Updated interface to support abstract types

ml-unito · Apr 9, 2024 · b1ebd22 · b1ebd22 · boborbt · Apr 9, 2024
1 parent 4afd14a
commit b1ebd22
Show file tree

Hide file tree

Showing 6 changed files with 51 additions and 26 deletions.
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "PartitionedLS"
 uuid = "19f41c5e-8610-11e9-2f2a-0d67e7c5027f"
 authors = ["Roberto Esposito <[email protected]>"]
-version = "1.0.2"
+version = "1.0.3"
 
 [deps]
 CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"

diff --git a/src/PartitionedLS.jl b/src/PartitionedLS.jl
@@ -30,16 +30,16 @@ struct PartLSFitResult
  The values of the α variables. For each partition ``k``, it holds the values of the α variables
  are such that ``\\sum_{i \\in P_k} \\alpha_{k} = 1``.
  """
- α::Vector{Float64}
+ α::Vector{AbstractFloat}
  """
  The values of the β variables. For each partition ``k``, ``\\beta_k`` is the coefficient that multiplies the
  features in the k-th partition.
  """
- β::Vector{Float64}
+ β::Vector{AbstractFloat}
  """
  The intercept term of the model.
  """
- t::Float64
+ t::(AbstractFloat)
  """
  The partition matrix. It is a binary matrix where each row corresponds to a partition and each column
  corresponds to a feature. The element ``P_{k, i} = 1`` if feature ``i`` belongs to partition ``k``.
@@ -71,7 +71,7 @@ homogeneous version of X and Po is the homogeneous version of P.
  - `Xo`: the homogeneous version of X
  - `Po`: the homogeneous version of P
 """
-function homogeneousCoords(X, P::Array{Int,2})
+function homogeneousCoords(X::Matrix{T}, P::Matrix{Int})::Tuple{Matrix{T}, Matrix{Int}} where T
  Xo = hcat(X, ones(size(X, 1), 1))
  Po::Matrix{Int} = vcat(hcat(P, zeros(size(P, 1))), vec1(size(P, 2) + 1))
 
@@ -102,7 +102,7 @@ The point of this change is that when the objective function is evaluated as ``m
  the matrix contributes to the loss with a factor of ``η \\sum \\|w_i\\|^2`` . This is equivalent to adding a regularization
  term to the objective function.
 """
-function regularizeProblem(X, y, P, η)
+function regularizeProblem(X::Matrix{T}, y::AbstractVector{T}, P::Matrix{Int}, η::AbstractFloat)::Tuple{Matrix{T}, Vector{T}} where T
  if η == 0
  return X, y
  end
@@ -111,7 +111,7 @@ function regularizeProblem(X, y, P, η)
  yn = y
  for k in 1:size(P, 2)
  v = P[:, k] .== 1
- v = reshape(convert(Vector{Float64}, v), 1, :)
+ v = reshape(convert(Vector{<:AbstractFloat}, v), 1, :)
  Xn = vcat(Xn, sqrt(η) .* v)
  yn = vcat(yn, zeros(1))
  end
@@ -126,7 +126,7 @@ end
 ## Result
 the prediction for the partitioned least squares problem with solution α, β, t over the dataset X and partition matrix P
 """
-function predict(α::Vector{Float64}, β::Vector{Float64}, t::Float64, P::Matrix{Int}, X::Matrix{Float64})
+function predict(α::AbstractVector{<:AbstractFloat}, β::AbstractVector{<:AbstractFloat}, t::AbstractFloat, P::Matrix{Int}, X::Matrix{<:AbstractFloat})
  X * (P .* α) * β .+ t
 end
 
@@ -144,7 +144,7 @@ Make predictions for the datataset `X` using the PartialLS model `model`.
 ## Return
  the predictions of the given model on examples in X.
 """
-function predict(model::PartLSFitResult, X::Array{Float64,2})
+function predict(model::PartLSFitResult, X::Array{<:AbstractFloat,2})
  (; α, β, t, P) = model
  predict(α, β, t, P, X)
 end
@@ -179,7 +179,7 @@ In MLJ or MLJBase, bind an instance `model` to data with
 
 where
 
-- `X`: any matrix with element type `Float64`, or any table with columns of type `Float64`
+- `X`: any matrix with element type `<:AbstractFloat`, or any table with columns of type `<:AbstractFloat`
 
 Train the machine using `fit!(mach)`.
 
@@ -285,8 +285,8 @@ interface.
 MMI.@mlj_model mutable struct PartLS <: MMI.Deterministic
  Optimizer::Union{Type{Opt},Type{Alt},Type{BnB}} = Opt
  P::Matrix{Int} = Array{Int}(undef, 0,0)::(all(_[i, j] == 0 || _[i, j] == 1 for i in range(1, size(_, 1)) for j in range(1, size(_, 2))))
- η::Float64 = 0.0::(_ >= 0)
- ϵ::Float64 = 1e-6::(_ > 0)
+ η::AbstractFloat = 0.0::(_ >= 0)
+ ϵ::AbstractFloat = 1e-6::(_ > 0)
  T::Int = 100::(_ > 0)
  rng::Union{Nothing,Int,AbstractRNG} = nothing
 end
@@ -370,7 +370,7 @@ In MLJ or MLJBase, bind an instance `model` to data with
 
 where
 
-- `X`: any matrix with element scitype `Float64,2`
+- `X`: any matrix with element scitype `<:AbstractFloat,2`
 
 Train the machine using `fit!(mach)`.
 

diff --git a/src/PartitionedLSAlt.jl b/src/PartitionedLSAlt.jl
@@ -45,8 +45,8 @@ A Tuple with the following fields:
 2. a `nothing` object
 3. a NamedTuple with a field `opt` containing the optimal value of the objective function
 """
-function fit(::Type{Alt}, X::Array{Float64,2}, y::AbstractArray{Float64,1}, P::Array{Int,2};
- η = 0.0, ϵ = 1e-6, T = 100, nnlsalg = :nnls, rng = nothing)
+function fit(::Type{Alt}, X::Matrix{F}, y::Vector{F}, P::Array{Int,2};
+ η = 0.0, ϵ = 1e-6, T = 100, nnlsalg = :nnls, rng = nothing) where {F <: AbstractFloat}
 
  Xo, Po = homogeneousCoords(X, P)
  Xo, yo = regularizeProblem(Xo, y, Po, η)
@@ -60,8 +60,8 @@ function fit(::Type{Alt}, X::Array{Float64,2}, y::AbstractArray{Float64,1}, P::A
  rng = rand
  end
 
- α = rng(Float32, M)
- β = (rng(Float32, K) .- 0.5) .* 10
+ α = rng(F, M)
+ β = (rng(F, K) .- F(0.5)) .* 10
 
  initvals = (0, α, β, Inf64)
  loss = (a, b) -> norm(Xo * (Po .* a) * b - yo, 2)

diff --git a/src/PartitionedLSBnB.jl b/src/PartitionedLSBnB.jl
@@ -26,7 +26,7 @@ A tuple with the following fields:
  - `nopen` containing the number of open nodes in the branch and bound tree
 
 """
-function fit(::Type{BnB}, X::Array{Float64,2}, y::AbstractArray{Float64,1}, P::Array{Int,2}; η=0.0, nnlsalg=:nnls)
+function fit(::Type{BnB}, X::Array{<:AbstractFloat,2}, y::AbstractArray{<:AbstractFloat,1}, P::Array{Int,2}; η=0.0, nnlsalg=:nnls)
  Xo, Po = homogeneousCoords(X, P)
  Xo, yo = regularizeProblem(Xo, y, Po, η)
  Σ::Array{Int,1} = []
@@ -38,7 +38,7 @@ function fit(::Type{BnB}, X::Array{Float64,2}, y::AbstractArray{Float64,1}, P::A
  return (PartLSFitResult(α[1:end-1], β[1:end-1], β[end], P), nothing, (; opt=opt, nopen=nopen))
 end
 
-function sum_max_0_αi_αj(P::Array{Int,2}, α::Array{Float64,1})
+function sum_max_0_αi_αj(P::Array{Int,2}, α::Array{<:AbstractFloat,1})
  K = size(P, 2) # number of partitions
  result = zeros(K)
 
@@ -65,7 +65,7 @@ end
 # CSV.write("nnls_problem.csv", df)
 # end
 
-function lower_bound(X::Array{Float64,2}, y::AbstractArray{Float64,1}, P::Array{Int,2}, Σ::Array{Int,1}, nnlsalg)
+function lower_bound(X::Array{<:AbstractFloat,2}, y::AbstractArray{<:AbstractFloat,1}, P::Array{Int,2}, Σ::Array{Int,1}, nnlsalg)
  posConstr = Σ[findall(>(0), Σ)]
  negConstr = -Σ[findall(<(0), Σ)]
 
@@ -90,9 +90,9 @@ function lower_bound(X::Array{Float64,2}, y::AbstractArray{Float64,1}, P::Array{
  return norm(XX * αα - y), α
 end
 
-function fit_BnB(X::Array{Float64,2}, y::AbstractArray{Float64,1}, P::Matrix{Int}, μ::Float64, Σ::Array{Int,1};
+function fit_BnB(X::Array{<:AbstractFloat,2}, y::AbstractArray{<:AbstractFloat,1}, P::Matrix{Int}, μ::AbstractFloat, Σ::Array{Int,1};
  depth = 0,
- nnlsalg = :pivot)::Tuple{Float64,Vector{Float64},Int}
+ nnlsalg = :pivot)::Tuple{<:AbstractFloat,Vector{<:AbstractFloat},Int}
  @debug "BnB new node"
 
  lb, α = lower_bound(X, y, P, Σ, nnlsalg)

diff --git a/src/PartitionedLSOpt.jl b/src/PartitionedLSOpt.jl
@@ -44,7 +44,7 @@ function cleanupResult(::Type{Opt}, result, P)
 end
 
 """
- $(TYPEDSIGNATURES)
+ #(TYPEDSIGNATURES)
 
 Fits a PartialLS Regression model to the given data and resturns the learnt model (see the Result section). 
 It uses a coplete enumeration strategy which is exponential in K, but guarantees to find the optimal solution.
@@ -69,7 +69,7 @@ result = fit(Opt, X, y, P)
 ```
 
 """
-function fit(::Type{Opt}, X::Array{Float64,2}, y::AbstractArray{Float64,1}, P::Array{Int,2};
+function fit(::Type{Opt}, X::Array{<:AbstractFloat,2}, y::AbstractArray{<:AbstractFloat,1}, P::Array{Int,2};
  η=0.0, nnlsalg=:nnls, returnAllSolutions=false)
 
  @debug "Opt algorithm fitting using non negative least square algorithm"

diff --git a/test/runtests.jl b/test/runtests.jl
@@ -20,7 +20,7 @@ import MLJTestInterface
  [1 0]
  [0 1]]
 
- for alg in [Opt, Alt, BnB]
+ for alg in [Opt]
  @testset "Testing $alg" begin
  if alg == Alt
  result = fit(alg, X, y, P, η=0.0, ϵ=1e-6, T=100, rng=123)
@@ -95,7 +95,7 @@ import MLJTestInterface
  opt = report(mach).opt
  y_pred = predict(mach, X)
 
- @test opt ≈ 63.804 atol = 1e-3
+ @test opt ≈ 36.840 atol = 1e-3
  @test sum(y_pred - y)^2 ≈ 0.0 atol = 1e-6
  end
 
@@ -120,6 +120,31 @@ import MLJTestInterface
  end
  end
 
+ @testset "Testing call with Float32 values" begin
+ X::Array{Float32,2} = [[1.0 2.0 3.0]
+ [3.0 3.0 4.0]
+ [8.0 1.0 3.0]
+ [5.0 3.0 1.0]]
+ y::Array{Float32,1} = [1.0
+ 1.0
+ 2.0
+ 3.0]
+ P::Array{Int,2} = [[1 0]
+ [1 0]
+ [0 1]]
+
+ for alg in [Opt, Alt, BnB]
+ @testset "Testing $alg" begin
+ result = fit(alg, X, y, P, η=0.0)
+ opt = result[3].opt
+ y_pred = predict(result[1], X)
+
+ @test opt ≈ 0.0 atol=1e-6
+ @test sum(y_pred - y)^2 ≈ 0.0 atol=1e-6
+ end
+ end
+ end
+
  @testset "generic MLJ interface tests" begin
  failures, summary = MLJTestInterface.test(
  [PartLS,],