using Base: NamedTuple, promote_eltype, inner_mapslices!
export RBFInterpolator
export RBFMachine, RBFMachineWithKernel, fit!, add_data!
export auto_grad, auto_jac, grad, jac, eval_and_auto_grad
export eval_and_auto_jac, eval_and_grad, eval_and_jac
Dependencies of this module:
using StaticPolynomials
using ThreadSafeDicts
using Memoization: @memoize
using StaticArrays
using LinearAlgebra: norm
using Lazy: @forward
using Parameters: @with_kw
for V in [:SizedVector, :MVector]
end
import Zygote
const Zyg = Zygote
using Zygote: Buffer
Radial Basis Function Models
The module RadialBasisFunctionModels
provides utilities to work with radial basis function [RBF] models. Given $N$ data sites $X = \{ x^1, …, x^N \} ⊂ ℝ^n$ and values $Y = \{ y^1, …, y^N \} ⊂ ℝ$, an interpolating RBF model $r\colon ℝ^n → ℝ$ has the form
\[r(x) = \sum_{i=1}^N w_i φ( \| x - x^i \|_2 ) + p(x),\]
where p
is a multivariate polynomial. The radial function $φ\colon [0, ∞) \to ℝ$ defines the RBF and we can solve for the coefficients $w$ by solving the interpolation system
\[\begin{equation} r( x^i ) \stackrel{!}= y^i \quad \text{for all }i=1,…,N \label{eqn:coeff_basic} \end{equation}\]
See the section about Getting the Coefficients for how we actually solve the equation system.
Radial Basis Function Sum.
The function $k(•) = φ(\|•\|_2)$ is radially symmetric around the origin. $k$ is called the kernel of an RBF.
We define an abstract super type for radial functions:
abstract type RadialFunction <: Function end
Each Type that inherits from RadialFunction
should implement an evaluation method. It takes the radius/distance $ρ = ρ(x) = \| x - x^i \|$ from $x$ to a specific center $x^i$.
(φ :: RadialFunction )( ρ :: Real ) :: Real = Nothing;
We also need the so called order of conditional positive definiteness:
cpd_order( φ :: RadialFunction) :: Int = nothing;
The derivative can also be specified. It defaults to
df( φ :: RadialFunction, ρ ) = Zyg.gradient( φ, ρ )[1]
df (generic function with 1 method)
The file radial_funcs.jl
contains various radial function implementations.
Some Radial Functions
The Gaussian is defined by $φ(ρ) = \exp \left( - (αρ)^2 \right)$, where $α$ is a shape parameter to fine-tune the function.
"""
Gaussian( α = 1 ) <: RadialFunction
A `RadialFunction` with
```math
φ(ρ) = \\exp( - (α ρ)^2 ).
```
"""
@with_kw struct Gaussian{R<:Real} <: RadialFunction
α :: R = 1
@assert α > 0 "The shape parameter `α` must be positive."
end
function ( φ :: Gaussian )( ρ :: Real )
exp( - (φ.α * ρ)^2 )
end
cpd_order( :: Gaussian ) = 0
df(φ :: Gaussian, ρ :: Real) = - 2 * φ.α^2 * ρ * φ( ρ )
df (generic function with 2 methods)
The Multiquadric is $φ(ρ) = - \sqrt{ 1 + (αρ)^2 }$ and also has a positive shape parameter. We can actually generalize it to the following form:
"""
Multiquadric( α = 1, β = 1//2 ) <: RadialFunction
A `RadialFunction` with
```math
φ(ρ) = (-1)^{ \\lceil β \\rceil } ( 1 + (αρ)^2 )^β
```
"""
@with_kw struct Multiquadric{R<:Real,S<:Real} <: RadialFunction
α :: R = 1 # shape parameter
β :: S = 1//2 # exponent
@assert α > 0 "The shape parameter `α` must be positive."
@assert β % 1 != 0 "The exponent must not be an integer."
@assert β > 0 "The exponent must be positive."
end
function ( φ :: Multiquadric )( ρ :: Real )
(-1)^(ceil(Int, φ.β)) * ( 1 + (φ.α * ρ)^2 )^φ.β
end
cpd_order( φ :: Multiquadric ) = ceil( Int, φ.β )
df(φ :: Multiquadric, ρ :: Real ) = (-1)^(ceil(Int, φ.β)) * 2 * φ.α * φ.β * ρ * ( 1 + (φ.α * ρ)^2 )^(φ.β - 1)
df (generic function with 3 methods)
Related is the Inverse Multiquadric $φ(ρ) = (1+(αρ)^2)^{-β}$:
"""
InverseMultiquadric( α = 1, β = 1//2 ) <: RadialFunction
A `RadialFunction` with
```math
φ(ρ) = ( 1 + (αρ)^2 )^{-β}
```
"""
@with_kw struct InverseMultiquadric{R<:Real,S<:Real} <: RadialFunction
α :: R = 1
β :: S = 1//2
@assert α > 0 "The shape parameter `α` must be positive."
@assert β > 0 "The exponent must be positive."
end
function ( φ :: InverseMultiquadric )( ρ :: Real )
( 1 + (φ.α * ρ)^2 )^(-φ.β)
end
cpd_order( :: InverseMultiquadric ) = 0
df(φ :: InverseMultiquadric, ρ :: Real ) = - 2 * φ.α^2 * φ.β * ρ * ( 1 + (φ.α * ρ)^2 )^(-φ.β - 1)
df (generic function with 4 methods)
The Cubic is $φ(ρ) = ρ^3$. It can also be generalized:
"""
Cubic( β = 3 ) <: RadialFunction
A `RadialFunction` with
```math
φ(ρ) = (-1)^{ \\lceil β \\rceil /2 } ρ^β
```
"""
@with_kw struct Cubic <: RadialFunction
β :: Int = 3
@assert β > 0 "The exponent `β` must be positive."
@assert β % 2 != 0 "The exponent `β` must not be an even number."
end
function ( φ :: Cubic )( ρ :: Real )
(-1)^ceil(Int, φ.β/2 ) * ρ^φ.β
end
cpd_order( φ :: Cubic ) = ceil( Int, φ.β/2 )
df(φ :: Cubic, ρ :: Real ) = (-1)^(ceil(Int, φ.β/2)) * φ.β * ρ^(φ.β - 1)
df (generic function with 5 methods)
The thin plate spline is usually defined via $φ(ρ) = ρ^2 \log( ρ )$. We provide a generalized version, which defaults to $φ(ρ) = - ρ^4 \log( ρ )$.
"""
ThinPlateSpline( k = 2 ) <: RadialFunction
A `RadialFunction` with
```math
φ(ρ) = (-1)^{k+1} ρ^{2k} \\log(ρ)
```
"""
@with_kw struct ThinPlateSpline <: RadialFunction
k :: Int = 2
@assert k > 0 && k % 1 == 0 "The parameter `k` must be a positive integer."
end
function (φ :: ThinPlateSpline )( ρ :: T ) where T<:Real
ρ == 0 ? zero(T) : (-1)^(φ.k+1) * ρ^(2*φ.k) * log( ρ )
end
cpd_order( φ :: ThinPlateSpline ) = φ.k + 1
df(φ :: ThinPlateSpline, ρ :: Real ) = ρ == 0 ? 0 : (-1)^(φ.k+1) * ρ^(2*φ.k - 1) * ( 2 * φ.k * log(ρ) + 1)
df (generic function with 6 methods)
The thin plate spline with k = 1
is not differentiable at ρ=0
but we define the derivative as 0, which results in a continuous extension.
From an RadialFunction
and a vector we can define a shifted kernel function.
const NumberOrVector = Union{<:Real, AbstractVector{<:Real}}
struct ShiftedKernel{RT <: RadialFunction, CT <: AbstractVector{<:Real}} <: Function
φ :: RT
c :: CT
end
norm2( vec ) = norm(vec, 2)
"Evaluate kernel `k` at `x - k.c`."
function (k::ShiftedKernel)( x :: AbstractVector{<:Real} )
return k.φ( norm2( x .- k.c ) )
end
Main.ShiftedKernel
A vector of $N$ kernels is a mapping $ℝ^n → ℝ^N, \ x ↦ [ k₁(x), …, k_N(x)]$.
_eval_vec_of_kernels( K, x ) = [k(x) for k ∈ K]
"Evaluate ``x ↦ [ k₁(x), …, k_{N_c}(x)]`` at `x`."
( K::AbstractVector{<:ShiftedKernel})( x ) = _eval_vec_of_kernels( K, x )
Base.AbstractVector
Suppose, we have calculated the distances $\|x - x^i\|$ beforehand. We can save redundant effort by passing them to the radial functions of the kernels.
"Evaluate `k.φ` for distance `ρ` where `ρ` should equal `x - k.c` for the argument `x`."
eval_at_dist( k :: ShiftedKernel , ρ :: Real ) = k.φ(ρ)
"Evaluate ``x ↦ [ k₁(x), …, k_{N_c}(x)]``, provided the distances ``[ ρ_1(x), …, ρ_{N_c}(x) ]``."
function eval_at_dist( K::AbstractVector{<:ShiftedKernel}, dists :: AbstractVector{<:Real})
[ eval_at_dist(k,ρ) for (k,ρ) ∈ zip(K,dists) ]
end
Main.eval_at_dist
Provided we have solved the interpolation system, the weights for the radial basis function part of $r$ are $w$, where $w$ is a vector of length $N_c$ or a matrix in $ℝ^{N_c \times k}$ where k is the number of outputs. We treat the general case $k\ge 1$ and always assume $w$ to be a matrix. (But note, that we actually store the transpose of $w$).
struct RBFSum{
KT <: AbstractVector{<:ShiftedKernel},
WT <: AbstractMatrix{<:Real}
}
kernels :: KT
weights :: WT # can be a normal matrix or a SMatrix
num_outputs :: Int
end
Make it display nicely:
function Base.show( io :: IO, rbf :: RBFSum{KT,WT} ) where {KT, WT}
compact = get(io, :compact, false)
if compact
print(io, "RBFSum{$(KT), $(WT)}")
else
n_out, n_kernels = size(rbf.weights)
print(io, "RBFSum\n")
print(io, "* with $(n_kernels) kernels in an array of type $(KT)\n")
print(io, "* and a $(n_out)x$(n_kernels) weight matrix of type $(WT).")
end
end
We can easily evaluate the ℓ
-th output of the RBFPart
:
@doc "Evaluate output `ℓ` of RBF sum `rbf::RBFSum`"
function (rbf :: RBFSum)(x :: AbstractVector, ℓ :: Int)
return (rbf.weights[ℓ,:]'rbf.kernels(x))[1]
end
@doc "Evaluate outputs `ℓ` of RBF sum `rbf::RBFSum`"
function (rbf :: RBFSum)(x :: AbstractVector, ℓ)
return rbf.weights[ℓ,:]*rbf.kernels(x)
end
Main.RBFSum
The overall output is a vector, and we also get it via matrix multiplication.
_eval_rbfsum(rbf::RBFSum, x ) = rbf.weights*rbf.kernels(x)
"Evaluate `rbf::RBFSum` at `x`."
(rbf :: RBFSum)( x :: AbstractVector ) = _eval_rbfsum(rbf, x)
Main.RBFSum
We want to return the right type and use _type_guard
:
_type_guard( x , :: Type{<:Vector}, :: Int ) = convert( Vector, x)
for V in [:SVector, :MVector, :SizedVector ]
@eval _type_guard( x, ::Type{ <: $V }, n_out :: Int ) = convert($V{ n_out }, x)
end
(rbf :: RBFSum)( x :: Vector ) = _type_guard( _eval_rbfsum(rbf, x), Vector, rbf.num_outputs )
function (rbf :: RBFSum)( x :: T ) where T<:Union{SVector,MVector,SizedVector}
return _type_guard( _eval_rbfsum(rbf, x), T, rbf.num_outputs )
end
As before, we allow to pass precalculated distance vectors:
function eval_at_dist( rbf::RBFSum, dists :: AbstractVector{<:Real}, ℓ :: Int )
rbf.weights[ℓ,:]'eval_at_dist( rbf.kernels, dists )
end
function eval_at_dist( rbf :: RBFSum, dists :: AbstractVector{<:Real})
vec(rbf.weights*eval_at_dist(rbf.kernels, dists ))
end
eval_at_dist (generic function with 4 methods)
For the PolynomialTail do something similar and use a StaticPolynomials.PolynomialSystem
with a weight matrix.
If the polynomial degree is < 0, we use an EmptyPolySystem
:
"Drop-In Alternative to `StaticPolynomials.PolynomialSystem` when there are no outputs."
struct EmptyPolySystem{Nvars} end
Base.length(::EmptyPolySystem) = 0
StaticPolynomials.npolynomials(::EmptyPolySystem) = 0
"Evaluate for usual vector input. (Scalar input also supported, there are no checks)"
StaticPolynomials.evaluate(:: EmptyPolySystem, :: Union{R, Vector{R}}) where R<:Real = Int[]
"Evaluate for sized input."
StaticPolynomials.evaluate(:: EmptyPolySystem{Nvars}, :: StaticVector ) where {Nvars} = SVector{0,Int}()
(p :: EmptyPolySystem)( x :: NumberOrVector) = evaluate(p, x)
function StaticPolynomials.jacobian( :: EmptyPolySystem{Nvars}, args... ) where Nvars
Matrix{Int}(undef, 0, Nvars )
end
function StaticPolynomials.evaluate_and_jacobian( p :: EmptyPolySystem, args ... )
return p(args...), jacobian(p, args...)
end
This allows for the PolySum
. polys
evaluates the polynomial basis and weights
are determined during training/fitting.
struct PolySum{
PS <: Union{EmptyPolySystem, PolynomialSystem},
WT <: AbstractMatrix
}
polys :: PS
weights :: WT # n_out × n_polys matrix
num_outputs :: Int
function PolySum( polys :: PS, weights :: WT) where{PS, WT}
n_out, n_polys = size(weights)
@assert npolynomials(polys) == n_polys "Number of polynomials does not match."
new{PS,WT}(polys, weights, n_out)
end
end
eval_psum( p :: PolySum, x ) = p.weights * p.polys(x)
(p :: PolySum)(x :: AbstractVector ) = eval_psum( p, x )
(p :: PolySum)(x :: Vector ) = _type_guard(eval_psum(p,x), Vector, p.num_outputs )
(p :: PolySum)(x :: T) where T<:Union{SVector,MVector,SizedVector} = _type_guard( eval_psum(p,x), T, p.num_outputs)
(p :: PolySum)(x,ℓ::Int) = (p.weights[ℓ,:]'p.polys(x))[end]
(p :: PolySum)(x,ℓ) = p.weights[ℓ,:]*p.polys(x)
We now have all ingredients to define the model type.
"""
RBFModel{V, RS, PS, M}
* `V` is `true` by default. It can be set to `false` only if the number
of outputs is 1. Then scalars are returned.
"""
struct RBFModel{
V,
RS <: RBFSum,
PS <: PolySum,
M }
rbf :: RS
psum :: PS
# Information fields
num_vars :: Int
num_outputs :: Int
num_centers :: Int
meta :: M
function RBFModel( rbf :: RS, psum :: PS, num_vars, num_outputs, num_centers, meta :: M = nothing; vec_output :: Bool = true) where{RS,PS,M}
return new{vec_output, RS,PS, M}(rbf, psum, num_vars, num_outputs, num_centers, meta)
end
end
Main.RBFModel
We want a model to be displayed in a sensible way:
function Base.show( io :: IO, mod :: RBFModel{V,RS,PS,M} ) where {V,RS,PS,M}
compact = get(io, :compact, false)
if compact
print(io, "$(mod.num_vars)D$(mod.num_outputs)D-RBFModel{$(V)}")
else
print(io, "RBFModel{$(V),$(RS),$(PS)}\n")
if V
print(io, "\twith vector output ")
else
print(io, " with scalar output ")
end
print(io, "and $(mod.num_centers) centers, ")
print(io, "mapping from ℝ^$(mod.num_vars) to ℝ^$(mod.num_outputs).")
end
end
Evaluation is easy. We accept an additional ::Nothing
argument that does nothing for now, but saves some typing later.
function vec_eval(mod :: RBFModel, x :: AbstractVector{<:Real}, :: Nothing)
return mod.rbf(x) .+ mod.psum( x )
end
function scalar_eval(mod :: RBFModel, x :: AbstractVector{<:Real}, :: Nothing )
return (mod.rbf(x) + mod.psum( x ))[1]
end
# @doc "Evaluate model `mod :: RBFModel` at vector `x`."
( mod :: RBFModel{true, RS, PS, M} where {RS,PS,M} )(x :: AbstractVector{<:Real}, ℓ :: Nothing = nothing ) = vec_eval(mod,x,ℓ)
( mod :: RBFModel{false, RS, PS, M} where {RS,PS,M} )(x :: AbstractVector{<:Real}, ℓ :: Nothing = nothing ) = scalar_eval(mod,x,ℓ)
"Evaluate scalar output(s) `ℓ` of model `mod` at vector `x`."
function (mod :: RBFModel)( x :: AbstractVector{<:Real}, ℓ)
return mod.rbf(x, ℓ) + mod.psum( x, ℓ )
end
# scalar input
function (mod :: RBFModel)(x :: Real, ℓ = nothing )
@assert mod.num_vars == 1 "The model has more than 1 inputs. Provide a vector `x`, not a number."
mod( [x,], ℓ)
end
Derivatives
The easiest way to provide derivatives is via Automatic Differentiation. We have imported Zygote
as Zyg
. For automatic differentiation we need custom adjoints for some StaticArrays
:
Zyg.@adjoint (T::Type{<:SizedMatrix})(x::AbstractMatrix) = T(x), dv -> (nothing, dv)
Zyg.@adjoint (T::Type{<:SizedVector})(x::AbstractVector) = T(x), dv -> (nothing, dv)
Zyg.@adjoint (T::Type{<:SArray})(x::AbstractArray) = T(x), dv -> (nothing, dv)
This allows us to define the following methods:
"Return the jacobian of `rbf` at `x` (using Zygote)."
function auto_jac( rbf :: RBFModel, x :: AbstractVector{<:Real} )
Zyg.jacobian( rbf, x )[1]
end
"Evaluate the model and return the jacobian at the same time."
function eval_and_auto_jac( rbf :: RBFModel, x :: AbstractVector{<:Real} )
y, back = Zyg._pullback( rbf, x )
T = eltype(y) # TODO does this make sense?
n = length(y)
jac = zeros(T, n, length(x) )
for i = 1 : length(x)
e = [ zeros(T, i -1 ); T(1); zeros(T, n - i ) ]
jac[i, :] .= back(e)[2]
end
return y, jac
end
"Return gradient of output `ℓ` of model `rbf` at point `x` (using Zygote)."
function auto_grad( rbf :: RBFModel, x :: AbstractVector{<:Real}, ℓ :: Int = 1)
Zyg.gradient( χ -> rbf(χ, ℓ), x )[1]
end
"Evaluate output `ℓ` of the model and return the gradient."
function eval_and_auto_grad( rbf :: RBFModel, x :: AbstractVector{<:Real}, ℓ :: Int = 1 )
y, back = Zyg._pullback( χ -> rbf(χ, ℓ)[end], x)
grad = back( one(y) )[2]
return y, grad
end
Main.eval_and_auto_grad
We need at least ChainRules@v.0.7.64
to have auto_grad
etc. work for StaticArrays, see this issue.
But we don't need Zygote
, because we can derive the gradients ourselves. Assume that $φ$ is two times continuously differentiable.
What is the gradient of a scalar RBF model? Using the chain rule and $ξ = x - x^j$ we get
\[\dfrac{∂}{∂ξ_i} \left( φ(\| ξ \|) \right) = φ\prime ( \| ξ \| ) \cdot \dfrac{∂}{∂ξ_i} ( \| ξ \| ) = φ\prime ( \| ξ \| ) \cdot \dfrac{ξ_i}{\|ξ\|}.\]
The right term is always bounded, but not well defined for $ξ = 0$ (see [wild_diss] for details).
That is why we require $φ'(0) \stackrel{!}= 0$.
We have $\dfrac{∂}{∂x_i} ξ(x) = 1$ and thus
\[∇r(x) = \sum_{i=1}^N \frac{w_i φ\prime( \| x - x^i \| )}{\| x - x^i \|} (x - x^i) + ∇p(x)\]
We can then implement the formula from above. For a fixed center $x^i$ let $o$ be the distance vector $x - x^i$ and let $ρ$ be the norm $ρ = \|o\| = \| x- x^i \|$. Then, the gradient of a single kernel is:
function grad( k :: ShiftedKernel, o :: AbstractVector{<:Real}, ρ :: Real )
ρ == 0 ? zero(k.c) : (df( k.φ, ρ )/ρ) .* o
end
grad (generic function with 1 method)
In terms of x
:
function grad( k :: ShiftedKernel, x :: AbstractVector{<:Real} )
o = x - k.c # offset vector
ρ = norm2( o ) # distance
return grad( k, o, ρ )
end
grad (generic function with 2 methods)
The jacobian of a vector of kernels follows suit:
function jacT( K :: AbstractVector{<:ShiftedKernel}, x :: AbstractVector{<:Real})
hcat( ( grad(k,x) for k ∈ K )... )
end
# precalculated offsets and distances, 1 per kernel
function jacT( K :: AbstractVector{<:ShiftedKernel}, offsets :: AbstractVector{<:AbstractVector}, dists :: AbstractVector{<:Real} )
hcat( ( grad(k,o,ρ) for (k,o,ρ) ∈ zip(K,offsets,dists) )... )
end
jac( K :: AbstractVector{<:ShiftedKernel}, args... ) = transpose( jacT(K, args...) )
jac (generic function with 1 method)
Hence, the gradients of an RBFSum are easy:
function grad( rbf :: RBFSum, x :: AbstractVector{<:Real}, ℓ :: Int = 1 )
#vec( jacT( rbf.kernels, x) * rbf.weights[:,ℓ] )
vec( rbf.weights[ℓ,:]'jac( rbf.kernels, x ) )
end
function grad( rbf :: RBFSum, offsets :: AbstractVector{<:AbstractVector}, dists :: AbstractVector{<:Real}, ℓ :: Int)
return vec( rbf.weights[ℓ,:]'jac( rbf.kernels, offsets, dists ) )
end
grad (generic function with 5 methods)
The grad
method looks very similar for the PolySum
. We obtain the jacobian of the polynomial basis system via PolynomialSystem.jacobian
.
function grad( psum :: PolySum, x :: AbstractVector{<:Real} , ℓ :: Int = 1)
return vec( psum.weights[ℓ,:]'jacobian( psum.polys, x ))
end
grad (generic function with 7 methods)
For the RBFModel
we simply combine both methods:
function _grad( mod :: RBFModel, x :: AbstractVector{<:Real}, ℓ :: Int = 1 )
return grad(mod.rbf, x, ℓ) + grad( mod.psum, x, ℓ )
end
function grad( mod :: RBFModel, x :: AbstractVector{<:Real}, ℓ :: Int = 1 )
return _grad(mod, x, ℓ)
end
grad( mod :: RBFModel, x :: Vector{<:Real}, ℓ :: Int = 1 ) = _type_guard( _grad(mod, x, ℓ), Vector, mod.num_vars )
function grad( mod :: RBFModel, x :: T, ℓ :: Int = 1 ) where T <: Union{SVector, MVector, SizedVector}
return _type_guard( _grad(mod, x, ℓ), T, mod.num_vars )
end
grad (generic function with 13 methods)
We can exploit our custom evaluation methods for "distances":
function _offsets_and_dists( rbf :: RBFSum, x :: AbstractVector{<:Real} )
offsets = [ x - k.c for k ∈ rbf.kernels ]
dists = norm2.(offsets)
return offsets, dists
end
function eval_and_grad( rbf :: RBFSum, offsets :: AbstractVector{<:AbstractVector}, dists :: AbstractVector{<:Real}, ℓ :: Int)
return eval_at_dist( rbf, dists, ℓ ), grad( rbf, offsets, dists, ℓ)
end
function eval_and_grad( rbf :: RBFSum, x :: AbstractVector{<:Real}, ℓ :: Int = 1)
offsets, dists = _offsets_and_dists(rbf, x)
return eval_and_grad( rbf, offsets, dists, ℓ)
end
eval_and_grad (generic function with 3 methods)
For the PolySum
we use evaluate_and_jacobian
.
function eval_and_grad( psum :: PolySum, x :: AbstractVector{<:Real}, ℓ :: Int = 1)
res_p, J_p = evaluate_and_jacobian( psum.polys, x )
return (psum.weights[ℓ,:]'res_p)[1], vec(psum.weights[ℓ,:]'J_p)
end
eval_and_grad (generic function with 5 methods)
Combine for RBFModel
:
function eval_and_grad( mod :: RBFModel, x :: AbstractVector{<:Real}, ℓ :: Int = 1 )
res_rbf, g_rbf = eval_and_grad( mod.rbf, x, ℓ )
res_polys, g_polys = eval_and_grad( mod.psum, x, ℓ )
return res_rbf .+ res_polys, g_rbf .+ g_polys
end
eval_and_grad (generic function with 7 methods)
For the jacobian, we use the same trick to save evaluations.
function jac( rbf :: RBFSum, x :: AbstractVector{<:Real}, rows = nothing )
offsets, dists = _offsets_and_dists(rbf, x)
isnothing(rows) && return rbf.weights * jac( rbf.kernels, offsets, dists )
return rbf.weights[rows, :] * jac( rbf.kernels, offsets, dists )
end
jacT(rbf :: RBFSum, args... ) = transpose( jac(rbf, args...) )
function jac( psum :: PolySum, x :: AbstractVector{<:Real}, rows = nothing )
isnothing(rows) && return psum.weights * jacobian( psum.polys, x )
return psum.weights[rows, :] * jacobian( psum.polys, x )
end
function _jac( mod :: RBFModel, x :: AbstractVector{<:Real}, rows = nothing )
jac( mod.rbf, x, rows ) + jac( mod.psum, x, rows)
end
jac( mod :: RBFModel, x :: AbstractMatrix{<:Real}, rows = nothing ) = _jac(mod,vec(x),rows)
jac( mod :: RBFModel, x :: Vector{<:Real}, rows = nothing) = convert(Matrix, _jac(mod,x,rows))
jac( mod :: RBFModel, x :: SVector{<:Real}, rows = nothing) = convert( SMatrix{mod.num_outputs, mod_nmu_vars}, _jac(mod,x,rows) )
jac( mod :: RBFModel, x :: MVector{<:Real}, rows = nothing) = convert( MMatrix{mod.num_outputs, mod_nmu_vars}, _jac(mod,x,rows) )
jac( mod :: RBFModel, x :: SizedVector{<:Real}, rows = nothing) = convert( SizedMatrix{mod.num_outputs, mod_nmu_vars}, _jac(mod,x,rows) )
jac (generic function with 15 methods)
As before, define an "evaluate-and-jacobian" function that saves evaluations:
function eval_and_jac( rbf :: RBFSum, x :: AbstractVector{<:Real} )
offsets, dists = _offsets_and_dists(rbf, x)
res = eval_at_dist( rbf, dists )
J = rbf.weights * jac( rbf.kernels, offsets, dists )
return res, J
end
function eval_and_jac( psum :: PolySum, x :: AbstractVector{<:Real} )
res_p, J_p = evaluate_and_jacobian( psum.polys, x )
return vec( psum.weights * res_p ), psum.weights * J_p
end
function eval_and_jac( mod :: RBFModel, x :: AbstractVector{<:Real} )
res_rbf, J_rbf = eval_and_jac( mod.rbf, x )
res_polys, J_polys = eval_and_jac( mod.psum, x)
return res_rbf + res_polys, J_rbf + J_polys
end
# TODO type stable eval_and_grad and eval_and_jac ?
eval_and_jac (generic function with 3 methods)
Hessians are not yet implemented.
For the Hessian $Hr \colon ℝ^n \to ℝ^{n\times n}$ we need the gradients of the component functions
\[ ψ_j(ξ) = \frac{ φ'( \left\| ξ \right\| )}{\|ξ\|} ξ_j\]
Suppose $ξ ≠ 0$. First, using the product rule, we have
\[ \dfrac{∂}{∂ξ_i} \left( \frac{ φ'( \left\| ξ \right\| )}{\|ξ\|} ξ_j \right) = ξ_j \dfrac{∂}{∂ξ_i} \left( \frac{ φ'( \left\| ξ \right\| )}{\|ξ\|} \right) + \frac{ φ'( \left\| ξ \right\| )}{\|ξ\|} \dfrac{∂}{∂ξ_i} ξ_j\]
The last term is easy because of
\[\frac{∂}{∂ξ_i} ξ_j = \begin{cases} 1 & \text{if }i = j,\\ 0 & \text{else.} \end{cases}\]
For the first term we find
\[ \dfrac{∂}{∂ξ_i} \left( \frac{ φ'( \left\| ξ \right\| )} {\|ξ\|} \right) = \frac{ φ'\left(\left\| ξ \right\|\right) ∂_i \|ξ\| - \|ξ\| ∂_i φ'\left( \left\| ξ \right\|\right) }{ \|ξ\|^2 } = \frac{ \dfrac{φ'(\|ξ\|)}{\|ξ\|} ξ_i - \|ξ\|φ''(\|ξ\|)\dfrac{ξ_i}{\|ξ\|} }{\|ξ\|^2}\]
Hence, the gradient of $ψ_j$ is
\[ ∇ψ_j(ξ) = \left( \frac{φ'(\|ξ\|)}{\|ξ\|^3} - \frac{φ''(\|ξ\|)}{\|ξ\|^2} \right) \cdot ξ -\frac{φ'(\|ξ\|)}{\|ξ\|} e^j,\]
where $e^j ∈ ℝ^n$ is all zeros, except $e^j_j = 1$. For $ξ = 0$ the first term vanishes due to L'Hôpital's rule:
\[∇ψ_j(0) = φ''(0) e^j.\]
This file is included from within RadialBasisFunctionModels.jl #src
Getting the Coefficients
const VecOfVecs{T} = AbstractVector{<:AbstractVector}
AbstractVector{<:AbstractVector} (alias for AbstractArray{<:AbstractArray{T, 1} where T, 1})
Polynomial Basis
Any constructor of an RBFModel
must solve for the coefficients in $\eqref{eqn:coeff_basic}$. To build the equation system, we need a basis $\{p_j\}_{1 \le j \le Q}$ of $Π_d(ℝ^n)$. For the interpolation system to be solvable we have to choose the right polynomial space for $p$. Basically, if the RBF Kernel (or the radial function) is conditionally positive definite of order $D$ we have to find a polynomial $p$ with $\deg p \ge D-1$.[wendland] If the kernel is CPD of order $D=0$ we do not have to add an polynomial and can interpolate arbitrary (distinct) data points.
The canonical basis is $x_1^{α_1} x_2^{α_2} … x_n^{α_n}$ with $α_i ≥ 0$ and $Σ_i α_i ≤ d$. For $\bar{d} \le d$ we can recursively get the non-negative integer solutions for $Σ_i α_i = \bar{d}$ with the following function:
@doc """
non_negative_solutions( d :: Int, n :: Int)
Return a matrix with columns that correspond to solution vectors
``[x_1, …, x_n]`` to the equation ``x_1 + … + x_n = d``,
where the variables are non-negative integers.
"""
function non_negative_solutions( d :: Int, n :: Int ) :: Matrix{Int}
if n == 1
return fill(d,1,1)
else
num_sols = binomial( d + n - 1, n - 1)
sol_matrix = Matrix{Int}(undef, n, num_sols)
j = 1
for i = 0 : d
# find all solutions of length `n-1` that sum to `i`
# if we add `d-i` to each column, then each column
# has `n` elements and sums to `d`
padded_shorter_solutions = vcat( d-i, non_negative_solutions(i, n-1) )
num_shorter_sols = size( padded_shorter_solutions, 2 )
sol_matrix[:, j : j + num_shorter_sols - 1] .= padded_shorter_solutions
j += num_shorter_sols
end
return sol_matrix
end
end
Main.non_negative_solutions
The polyonmial basis exponents are then given by all possible $\bar{d}\le d$:
@doc """
non_negative_solutions_ineq( d :: Int, n :: Int)
Return a matrix with columns that correspond to solution vectors
``[x_1, …, x_n]`` to the equation ``x_1 + … + x_n <= d``,
where the variables are non-negative integers.
"""
@memoize ThreadSafeDict function non_negative_solutions_ineq( d :: Int, n :: Int ) :: Matrix{Int}
no_cols = binomial( n + d, n)
ret_mat = Matrix{Int}(undef, n, no_cols )
i = 1
for d̄ = 0 : d
sub_mat = non_negative_solutions( d̄, n)
ret_mat[:, i:i + size(sub_mat,2) - 1] .= sub_mat[:,:]
i += size(sub_mat, 2)
end
return ret_mat
end
Main.non_negative_solutions_ineq
I did an unnecessary rewrite of non_negative_solutions
to be Zygote-compatible. Therefore the matrices etc. Combinatorics
has multiexponents
which should do the same...
We don't use DynamicPolynomials.jl
to generate the Polyomials anymore. Zygote did overflow when there were calculations with those polynomials. Not a problem for calculating the basis (because of we are ignore()
ing the basis calculation now, assuming we never want to differentiate with respect to n,d
), but when constructing the outputs from them. Instead we directly construct StaticPolynomial
s and define a PolynomialSystem
that evaluates all basis polynomials.
@doc """
canonical_basis( n:: Int, d :: Int ) :: Union{PolynomialSystem, EmptyPolySystem}
Return the canonical basis of the space of `n`-variate
polynomials of degree at most `d`.
"""
function canonical_basis(n :: Int, d::Int, OneType :: Type = Float64)
if d < 0
return EmptyPolySystem{n}()
else
exponent_matrix = non_negative_solutions_ineq( d, n )
one_float = OneType(1) # `one_float` is used as coefficient(s) to guarantee floating point output
return PolynomialSystem(
( Polynomial( [one_float,], e[:,:] ) for e ∈ eachcol(exponent_matrix) )...
)
end
end
Main.canonical_basis
Solving the Equation System
Now let $\{p_j\}_{1\le j\le Q}$ be a basis of the polynomial space. Set $P = [ p_j(x^i) ] ∈ ℝ^{N × Q}$ and $Φ = φ(\| x^i - x^j \|)$. In case of interpolation, the linear equation system for the coefficients of $r$ is
\[S c := \begin{equation} \begin{bmatrix} Φ & P \\ P^T & 0_{Q × Q} \end{bmatrix} \begin{bmatrix} w \\ λ \end{bmatrix} \stackrel{!}= \begin{bmatrix} Y \\ 0_Q \end{bmatrix}. \tag{I} \label{eqn:coeff} \end{equation}\]
We can also use differing feature vectors and centers. $Φ$ then becomes $Φ = [k_j(x^i)]_{1\le i \le N_d, 1\le j \le N_c} = [φ(‖ x^i - ξ^j ‖)]$, where we denote the number of kernel centers by $N_c$ and the number of feauters ($d$ata) by $N_d$. In the overdetermined least squares case (with pair-wise different centers and pair-wise different features), we do away with the second row of equations in \eqref{eqn:coeff}. The solution $c = [w, λ]^T$ is then given by the Moore-Penrose Pseudo-Inverse:
\[ c = \underbrace{ ( S^T S )^{-1} S^T}_{=S^\dagger} \begin{bmatrix} Y \\ 0_Q \end{bmatrix}.\]
Julia automatically computes the LS solution with S\RHS
.
When we have vector data $Y ⊂ ℝ^k$, e.g. from modelling MIMO functions, then Julia easily allows for multiple columns in the righthand side of the interpolation equation system and we get weight vectors for multiple models, that can be thought of as one vector model $r\colon ℝ^n \to ℝ^k$.
Some helpers to build the matrices:
"Provided an array of sites with `n` variables, return a polynomial basis system of degree `poly_deg`."
function make_polys(sites, poly_deg = 1)
poly_precision = promote_type(Float16, inner_type(sites))
poly_basis_sys = Zyg.ignore() do
canonical_basis( length(sites[1]), poly_deg, poly_precision )
end
return poly_basis_sys
end
function make_kernel( φ :: RadialFunction, center :: AbstractVector{<:Real})
return ShiftedKernel(φ,center)
end
"Return array of `ShiftedKernel`s based functions in `φ_arr` with centers from `centers`."
function make_kernels( φ_arr :: AbstractVector{<:RadialFunction}, centers :: VecOfVecs )
@assert length(φ_arr) == length(centers)
return [ make_kernel(φ,c) for (φ,c) ∈ zip( φ_arr, centers) ]
end
"Return array of `ShiftedKernel`s based function `φ` with centers from `centers`."
function make_kernels( φ :: RadialFunction, centers :: VecOfVecs )
return [ make_kernel(φ,c) for c ∈ centers ]
end
"Return RBF basis matrix by applying each kernel to each site. Kernels vary with the columns."
function _rbf_matrix( kernels, sites )
return transpose( hcat( map(kernels, sites)... ) )
end
"Return polynomial basis matrix by applying each basis polynomial to each site. Polynomials vary with the columns."
function _poly_matrix( polys, sites )
return transpose( hcat( map(polys, sites)... ) )
end
"Return RBF and Polynomial basis matrices as well as the vector of kernels and the polynomial basis system."
function get_matrices( φ, sites, centers = sites; poly_deg = 1 )
kernels = make_kernels( φ, centers )
polys = make_polys( sites, poly_deg )
return _rbf_matrix( kernels, sites ), _poly_matrix( polys, sites ), kernels, polys
end
Main.get_matrices
Later, the construtor will call the above helper functions and then use these methods to retrieve the coefficients:
@doc """
coefficients(sites, values, kernels, rad_funcs, polys )
Return the coefficient matrices `w` and `λ` for an rbf model
``r(x) = Σ_{i=1}^N wᵢ φ(\\|x - x^i\\|) + Σ_{j=1}^M λᵢ pᵢ(x)``,
where ``N`` is the number of centers and ``M``
is the number of n-variate basis polynomials.
"""
function coefficients( Φ, P, values; mode :: Symbol = :ls)
N_d, N_c = size( Φ )
N_dp, Q = size( P )
@assert N_d == N_dp "Row count of RBF and Poly basis matrices does not match."
if N_d < N_c
error("Underdetermined models not supported yet.")
end
if N_d < Q
error("Too few data sites for selectod polynomial degree. (Need at least $(Q).)")
end
# system matrix S and right hand side
S = [Φ P]
RHS = transpose( hcat(values... ) )
return _coefficients( Φ, P, S, RHS, Val(:ls) )
end
Main.coefficients
The actual work is delegated to _coefficients
.
function _coefficients( Φ, P, S, RHS, ::Val{:ls} )
N_c = size(Φ,2); Q = size(P,2);
coeff = S \ RHS
return _coeff_matrices(coeff, S, RHS, N_c, Q )
end
# interpolation requires zero padding
function _coefficients( Φ, P, S, RHS, ::Val{:interpolation} )
N_d, N_c = size(Φ); Q = size(P,2);
@assert N_d == N_c "Interpolation requires same number of features and centers." # TODO remove assertion
S̃ = [ S ; # N_d × (N_c + Q)
P' zeros(eltype(S), Q, Q )] # Q × N_d and Q × Q
RHS_padded = [ RHS;
zeros( eltype(RHS), Q, size(RHS,2) ) ];
coeff = S̃ \ RHS_padded
return _coeff_matrices( coeff, S̃, RHS_padded, N_c, Q )
end
# treat sized arrays in a special way, so that zero padding preserves the types
# can potentially be removed once https://github.com/JuliaArrays/StaticArrays.jl/issues/856 is resolved
function _coefficients( Φ, P, S :: StaticMatrix, RHS :: StaticMatrix, ::Val{:interpolation} )
N_d, N_c = size(Φ); Q = size(P,2);
@assert N_d == N_c "Interpolation requires same number of features and centers." # TODO remove assertion
S̃ = [S ;
P' @SMatrix(zeros(eltype(S),Q,Q)) ];
RHS_padded = [ RHS;
@SMatrix(zeros(eltype(RHS), Q ,size(RHS,2)))];
coeff = S̃ \ RHS_padded
return _coeff_matrices( coeff, S̃, RHS_padded, N_c, Q )
end
# this method returns random coefficients
function _coefficients( Φ, P, S, RHS, ::Val{:rand} )
F = Base.promote_eltype(S, RHS)
N_c = size(Φ,2); Q = size(P,2)
m = N_c + Q
k = size(RSH, 2)
return _coeff_matrices(rand( F, m, k ), S, RHS, N_c, Q )
end
_coefficients (generic function with 4 methods)
We want statically sized matrices when appropriate, that is why we call _coeff_matrices
:
"Return rbf weights, polynomial regressor weights, system matrix and right hand side of the RBF equations."
function _coeff_matrices(coeff :: AbstractMatrix, S, RHS, N_c, Q )
return view(coeff, 1 : N_c, :), view(coeff, N_c + 1 : N_c + Q, :), S, RHS
end
# index into `coeff` using static array so that the results are sized, too
function _coeff_matrices(coeff :: StaticMatrix, S, RHS, N_c, Q )
return coeff[ SVector{N_c}(1 : N_c), :], coeff[ SVector{Q}( N_c + 1 : N_c + Q ), :], S, RHS
end
_coeff_matrices (generic function with 2 methods)
We can easily impose linear equality constraints, for example requiring interpolation only on a subset of features. In matrix form, $I$ linear equality constraints (for $k$ outputs) can be written as
\[E c = b, \quad E ∈ ℝ^{I×(N_c + Q)}, b ∈ ℝ^{I\times k},\, I,k ∈ ℕ_0.\]
Now, let $ĉ$ be the least squares solution from above. The constrained solution is
\[ c = ĉ - Ψ E^T ( E Ψ E^T)^{-1} ( E ĉ - b ), \; Ψ := (S^T S)^{-1} \tag{CLS1} \label{eqn:cls1}\]
This results from forming the Lagrangian of an equivalent minimization problem. Let $δ = ĉ - c ∈ ℝ^{q\times k}, q = N_c + Q,$ and define the constraint residuals as $γ = Eĉ - b ∈ ℝ^{I\times k}$. The Lagrangian for minimizing $δ^TS^TSδ$ under $Eδ=γ$ is
\[\begin{aligned} L &= δ^T S^T S δ + 2 λ^T( E δ - γ )\\ D_δL &= 2 δ^T S^T S + 2λ^T E \\ D_λL &= 2 δ^T E^T - 2 γ^T \end{aligned}\]
Setting the derivatives to zero leads to \eqref{eqn:cls1} via
\[ \begin{bmatrix} S^T S & E^T \\ E & 0_{I\times I} \end{bmatrix} \begin{bmatrix} δ \\ λ \end{bmatrix} = \begin{bmatrix} 0_{q\times k} \\ γ \end{bmatrix} \tag{L} \label{eqn:cls2}\]
See [adv_eco] for details.
function constrained_coefficients(
w :: AbstractMatrix{<:Real},
λ :: AbstractMatrix{<:Real},
S :: AbstractMatrix{<:Real},
E :: AbstractMatrix{<:Real},
b :: AbstractMatrix{<:Real}
)
# Using Lagrangian approach:
ĉ = [w; λ] # least squares solution
γ = E*ĉ - b # constraint residuals
I, q = size(E)
k = size(w,2)
A = vcat(
[S'S E'],
[E zeros(Int,I,I)]
)
RHS = [
zeros(Int, q, k);
γ
]
δλ = A \ RHS
δ = δλ[1 : q, :]
c = ĉ - δ # coefficients for constrained problem
N_c = size(w,1)
return c[1 : N_c, :], c[N_c+1:end, :]
end
constrained_coefficients (generic function with 1 method)
For the case that mentioned above, that is, interpolation at a subset of sites, we can easily build the $E$ matrix from the $S$ matrix by taking the corresponding rows.
function constrained_coefficients(
w :: AbstractMatrix{<:Real},
λ :: AbstractMatrix{<:Real},
S :: AbstractMatrix{<:Real},
RHS_ls :: AbstractMatrix{<:Real},
interpolation_indices :: AbstractVector{Int}
)
E = S[interpolation_indices, :]
b = RHS_ls[interpolation_indices, :]
return constrained_coefficients( w, λ, S, E, b )
end
constrained_coefficients (generic function with 2 methods)
The Actual, Usable Constructor
We want the user to be able to pass 1D data as scalars and use the following helpers:
ensure_vec_of_vecs( before :: AbstractVector{<:AbstractVector{<:Real}} ) = before
ensure_vec_of_vecs( before :: AbstractVector{ <:Real }) = [[x,] for x in before ]
function inner_type( vec_of_vecs :: AbstractVector{<:AbstractVector{T}}) where T
if Base.isabstracttype(T) # like Any if data is of mixed precision
return Float64
else
return T
end
end
function _check_data(features,labels,centers; type_checks = true)
# Basic Data integrity checks
@assert !isempty(features) "Provide at least 1 feature vector."
@assert !isempty(labels) "Provide at least 1 label vector."
num_vars = length(features[1])
num_outputs = length(labels[1])
@assert all( length(s) == num_vars for s ∈ features ) "All features must have same dimension."
@assert all( length(v) == num_outputs for v ∈ labels ) "All labels must have same dimension."
num_sites = length(features)
num_vals = length(labels)
@assert num_sites == num_vals "Provide as many features as labels."
if type_checks
sites = ensure_vec_of_vecs(features)
values = ensure_vec_of_vecs(labels)
if !isempty(centers)
@assert all( length(c) == num_vars for c ∈ centers ) "All centers must have dimension $(num_vars)."
C = ensure_vec_of_vecs(centers)
else
C = sites
end
else
sites = features
values = labels
C = centers
end
num_centers = length(C)
return num_vars, num_outputs, num_sites, num_vals, num_centers, sites, values, C
end
_check_data (generic function with 1 method)
We now have all ingredients for the basic outer constructor:
@doc """
RBFModel( features, labels, φ = Multiquadric(), poly_deg = 1; kwargs ... )
Construct a `RBFModel` from the feature vectors in `features` and
the corresponding labels in `lables`, where `φ` is a `RadialFunction` or a vector of
`RadialFunction`s.\n
Scalar data can be used, it is transformed internally. \n
StaticArrays can be used, e.g., `features :: Vector{<:SVector}`.
Providing `SVector`s only might speed up the construction.\n
If the degree of the polynomial tail, `poly_deg`, is too small it will be set to `cpd_order(φ)-1`.
If the RBF centers do not equal the the `features`, you can use the keyword argument `centers` to
pass a list of centers. If `φ` is a vector, then the length of `centers` and `φ` must be equal and
`centers[i]` will be used in conjunction with `φ[i]` to build a `ShiftedKernel`. \n
If `features` has 1D data, the output of the model will be a 1D-vector.
If it should be a scalar instead, set the keyword argument `vector_output` to `false`.
"""
function RBFModel(
features :: AbstractVector{ <:NumberOrVector },
labels :: AbstractVector{ <:NumberOrVector },
φ :: Union{RadialFunction, AbstractVector{<:RadialFunction}} = Multiquadric(),
poly_deg :: Int = 1;
centers :: AbstractVector{ <:NumberOrVector } = Vector{Float16}[],
interpolation_indices :: AbstractVector{ <: Int } = Int[],
vector_output :: Bool = true,
coeff_mode :: Symbol = :auto,
save_matrices :: Bool = true
)
num_vars, num_outputs, num_sites, num_vals, num_centers, sites, values, C = _check_data( features, labels, centers )
if coeff_mode == :auto
can_interpolate_uniquely = φ isa RadialFunction ? poly_deg >= cpd_order(φ) - 1 : all( poly_deg >= cpd_order(phi) - 1 for phi in φ )
coeff_mode = num_sites == num_centers && can_interpolate_uniquely ? :interpolation : :ls
end
# check if there are enough sites for polynomial interpolation
required_sites = binomial( num_vars + poly_deg, num_vars )
if num_sites < required_sites
for pd = poly_deg-1:-1:-1
if binomial( num_vars + pd, num_vars ) <= num_sites
@warn "There are not enough sites for the selected polynomial degree $(poly_deg), using $(pd) instead."
poly_deg = pd
break
end
end
end
Φ, P, kernels, poly_basis_sys = get_matrices( φ, sites, C; poly_deg )
w, λ, S, RHS = coefficients( Φ, P, values; mode = coeff_mode )
meta = save_matrices ? (rbf_mat = Φ, poly_mat = P) : nothing
if !isempty(interpolation_indices)
w, λ = constrained_coefficients( w, λ, S, RHS, interpolation_indices)
end
# build output polynomials
poly_sum = PolySum( poly_basis_sys, transpose(λ) )
# build RBF system
rbf_sys = RBFSum(kernels, transpose(w), num_outputs)
# vector output? (dismiss user choice if labels are vectors)
vec_output = num_outputs == 1 ? vector_output : true
return RBFModel(rbf_sys, poly_sum, num_vars, num_outputs, num_centers, meta; vec_output)
end
Main.RBFModel
Special Constructors
We offer some specialized models (that simply wrap the main type).
struct RBFInterpolationModel
model :: RBFModel
end
(mod :: RBFInterpolationModel)(args...) = mod.model(args...)
@forward RBFInterpolationModel.model grad, jac, jacT, auto_grad, auto_jac
The constructor is a tiny bit simpler and additional checks take place:
"""
RBFInterpolationModel(features, labels, φ, poly_deg; kwargs… )
Build a model interpolating the feature-label pairs.
Does not accept `center` keyword argument.
"""
function RBFInterpolationModel(
features :: AbstractVector{ <:NumberOrVector },
labels :: AbstractVector{ <:NumberOrVector },
φ :: Union{RadialFunction,AbstractVector{<:RadialFunction}} = Multiquadric(),
poly_deg :: Int = 1;
vector_output :: Bool = true,
save_matrices :: Bool = true
)
@assert length(features) == length(labels) "Provide as many features as labels!"
if poly_deg < cpd_order(φ) - 1
@warn "Polyonmial degree too small for interpolation. Using $(cpd_order(φ)-1)."
poly_deg = max( poly_deg, cpd_order(φ) - 1 )
end
mod = RBFModel(features, labels, φ, poly_deg; vector_output, save_matrices, coeff_mode = :interpolation)
return RBFInterpolationModel( mod )
end
Main.RBFInterpolationModel
We want to provide a convenient alternative constructor for interpolation models so that the radial function can be defined by passing a Symbol
or String
.
const SymbolToRadialConstructor = NamedTuple((
:gaussian => Gaussian,
:multiquadric => Multiquadric,
:inv_multiquadric => InverseMultiquadric,
:cubic => Cubic,
:thin_plate_spline => ThinPlateSpline
))
"Obtain a `RadialFunction` from its name and constructor arguments."
function _get_rad_func( φ_symb :: Union{Symbol, String}, φ_args )
# which radial function to use?
radial_symb = Symbol( lowercase( string( φ_symb ) ) )
if !(radial_symb ∈ keys(SymbolToRadialConstructor))
@warn "Radial Funtion $(radial_symb) not known, using Gaussian."
radial_symb = :gaussian
φ_args = nothing
end
constructor = SymbolToRadialConstructor[radial_symb]
if isnothing(φ_args)
φ = constructor()
else
φ = constructor( φ_args... )
end
return φ
end
Main._get_rad_func
The alternative constructors are build programmatically:
for op ∈ [ :RBFInterpolationModel, :RBFModel ]
@eval begin
function $op(
features :: AbstractVector{ <:NumberOrVector },
labels :: AbstractVector{ <:NumberOrVector },
φ_symb :: Union{Symbol, String},
φ_args = nothing,
poly_deg :: Int = 1; kwargs...
)
φ = _get_rad_func( φ_symb, φ_args )
return $op(features, labels, φ, poly_deg; kwargs... )
end
end
end
Container with Training Data
The RBF Machine is similar in design to what an MLJ machine does: Training data (feature and label vectors) are stored and can be added. The inner model is trained with fit!
.
TODO In the future, we can customize the fit!
method when updating a model to only consider new training data. This also makes type conversion of the whole data arrays unnecessary.
"""
RBFMachineWithKernel(; φ, features, labels, poly_deg)
A container holding an inner `model :: RBFModel` (or `model == nothing`).
The inner model interpolates the data after calling `fit!`.
An array of arrays of features is stored in the `features` field.
Likewise for `labels`.
After `add_data!`, the model has to be fitted again, indicated by its field `is_valid`.
In constrast to the more general RBFModel, only one single RBF function (i.e. one set of shape parameters)
should be used.
"""
@with_kw mutable struct RBFMachineWithKernel{
FT <: AbstractVector{<:AbstractVector{<:AbstractFloat}},
LT <: AbstractVector{<:AbstractVector{<:AbstractFloat}},
R <: RadialFunction,
}
φ :: R = Gaussian()
features :: FT = Vector{Float64}[]
labels :: LT = Vector{Float64}[]
poly_deg :: Int = 1
model :: Union{Nothing, RBFModel} = nothing
valid :: Bool = false # is model trained on all data sites?
function RBFMachineWithKernel{FT,LT,R}(φ::R, features::FT, labels::LT, poly_deg, model, valid ) where{R,FT,LT}
@assert length(features) == length(labels) "Need same number of `features` and `labels`."
features_copied = copy(features)
labels_copied = copy(labels)
return new{FT,LT,R}(φ,features_copied, labels_copied, poly_deg, model, valid )
end
end
function RBFMachine(; kernel_name :: Symbol = :gaussian, kernel_args = nothing, poly_deg = 1,
labels :: FT = Vector{Float64}[], features :: LT = Vector{Float64}[], kwargs... ) where {FT, LT}
T = promote_type(eltype(eltype(FT)), eltype(eltype(LT)))
KARGS = isnothing(kernel_args) ? nothing : T.(kernel_args)
φ = _get_rad_func( kernel_name, KARGS )
@assert poly_deg >= cpd_order(φ) - 1 "Polynomial degree too low for interpolation."
return RBFMachineWithKernel(; φ, poly_deg, labels, features, kwargs...)
end
"Return floating point type of training data elements."
_precision( :: RBFMachineWithKernel{FT,LT} ) where {FT,LT} = eltype( Base.promote_eltype(FT, LT) )
"Fit `mach :: RBFMachineWithKernel` to the training data."
function fit!( mach :: RBFMachineWithKernel )::Nothing
@assert length(mach.features) > 0 "Cannot `fit!` without data."
@assert length(mach.features) == length(mach.labels) "Interpolation requires same number of features and labels."
num_needed = binomial( mach.poly_deg + length(mach.features[1]), mach.poly_deg)
@assert length(mach.features) >= num_needed "Too few data sites for selected polynomial degree (need $(num_needed))."
mach.model = RBFModel( mach.features, mach.labels, mach.φ, mach.poly_deg; save_matrices = true )
mach.valid = true
return nothing
end
Main.fit!
Forward evaluation methods of inner model:
( mach :: RBFMachineWithKernel )(args...) = mach.model(args...)
@forward RBFMachineWithKernel.model grad, jac, jacT, auto_grad, auto_jac
Methods to add features and labels:
"Add a feature vector(s) and a label(s) to the `machine` container."
function add_data!(
m :: RBFMachineWithKernel, features :: AbstractVector{<:AbstractVector}, labels :: AbstractVector{<:AbstractVector}
) :: Nothing
@assert length(features) == length(labels) "Provide same number of features and labels."
@assert all( length(f) == length(features[1]) for f in features ) "Features must have same length."
@assert all( length(l) == length(labels[1]) for l in labels ) "Labels must have same length"
@assert isempty(m.features) || length(m.features[1]) == length(features[1]) && length(m.labels[1]) == length(labels[1]) "Length doesnt match previous data."
append!(m.features, features)
append!(m.labels, labels)
m.valid = false
return nothing
end
function add_data!(
m :: RBFMachineWithKernel, feature :: AbstractVector{<:AbstractFloat}, label:: AbstractVector{<:AbstractFloat}
) :: Nothing
return add_data!(m, [ feature, ], [label, ])
end
add_data! (generic function with 2 methods)
Convenience methods to "reset" a machine:
function Base.empty!( m :: RBFMachineWithKernel ) :: Nothing
empty!(m.features)
empty!(m.labels)
m.model = nothing
m.valid = false
return nothing
end
function Base.isempty(m :: RBFMachineWithKernel ) :: Bool
isempty( m.features ) && isempty( m.labels ) && isnothing(m.model)
end
include("mlj_interface.jl")
This page was generated using Literate.jl.