01-overview

Basic Introduction to Machine Learning: 01-overview

This page was generated from a single Julia file: 01-overview.jl.

In any such Julia documentation, you can access the source code using the "Edit on GitHub" link in the top right.

The corresponding notebook can be viewed in nbviewer here: 01-overview.ipynb, and opened in binder here: 01-overview.ipynb.

Setup

Packages needed here.

using LinearAlgebra: norm
using Random: seed!
using LaTeXStrings # pretty plot labels
using Plots: plot, plot!, scatter, scatter!, surface!, default, font, gui
using MIRTjim: jim, prompt
using InteractiveUtils: versioninfo

default(markersize=5, markerstrokecolor=:auto, label="")
fnt = font("DejaVu Sans", 15) # larger default font
default(guidefont=fnt, xtickfont=fnt, ytickfont=fnt, legendfont=fnt)
default(tickfontsize=10, legendfontsize=11)

The following line is helpful when running this file as a script; this way it will prompt user to hit a key after each figure is displayed.

isinteractive() ? jim(:prompt, true) : prompt(:draw);

Supervised learning: classification

seed!(0)
n1 = 50; n2 = n1
rot = phi -> [cos(phi) sin(-phi); sin(phi) cos(phi)]
data1 = rot(π/8) * ([3 0; 0 1] * randn(2,n1) .+ [8;2])
data2 = rot(π/4) * ([2 0; 0 1] * randn(2,n2) .+ [9;3])
scatter(data1[1,:], data1[2,:], color=:blue, label="class1")
scatter!(data2[1,:], data2[2,:], color=:red, label="class2")
plot!(xlabel=L"x_1", ylabel=L"x_2")
plot!(xlim=(0,14), ylim=(0,14))
plot!(aspect_ratio=1, xtick=[0, 14], ytick=[0, 14])
x = LinRange(0,14,101)
y = 2 .+ x - 0.03 * x.^2 # add decision boundary
plot!(x, y, color=:magenta)

prompt()

Supervised learning: regression

seed!(0)
N = 40
f = (x) -> 10. / (x + 1)
xt = 10 * rand(N)
yt = f.(xt) + 0.4 * randn(N)
x = range(0, 10, 101)
y = f.(x)
scatter(xt, yt, color=:blue, label="training data")
plot!(xlabel=L"x", ylabel=L"y")
plot!(xlim=(0,10), ylim=(0,8))
plot!(xtick=0:5:10, ytick=0:4:8)

Polynomial regression model

Afun = (tt) -> [t.^i for t in tt, i in 0:3] # matrix of monomials
A = Afun(xt)
coef = A \ yt
y = Afun(x) * coef
plot!(x, y, line=:magenta, label="cubic regression")

prompt()

Unsupervised learning: data

seed!(0)
n1 = 50; n2 = n1; n3=n1
rot = phi -> [cos(phi) sin(-phi); sin(phi) cos(phi)]
data1 = rot(π/4) * ([2 0; 0 0.7] * randn(2,n1) .+ [9;3])
data2 = rot(π/8) * ([3 0; 0 0.6] * randn(2,n2) .+ [8;2])
data3 = rot( 0 ) * ([2 0; 0 0.5] * randn(2,n3) .+ [9;1]);

plot(xlabel = L"x_1", ylabel = L"x_2")
scatter!(data1[1,:], data1[2,:], color=:black, label="training data")
scatter!(data2[1,:], data2[2,:], color=:black)
scatter!(data3[1,:], data3[2,:], color=:black)
plot!(xlim=(0,14), ylim=(0,14))
plot!(aspect_ratio=1, xtick=[0, 14], ytick=[0, 14])

prompt()

Clustering (oracle)

plot(xlabel = L"x_1", ylabel = L"x_2")
scatter!(data1[1,:], data1[2,:], color=:blue, label="cluster1")
scatter!(data2[1,:], data2[2,:], color=:red, label="cluster2")
scatter!(data3[1,:], data3[2,:], color=:orange, label="cluster3")
plot!(xlim=(0,14), ylim=(0,14))
plot!(aspect_ratio=1, xtick=[0, 14], ytick=[0, 14])

prompt()

Novelty detection

plot(xlabel=L"x_1", ylabel=L"x_2")
scatter!(data1[1,:], data1[2,:], color=:black)
scatter!(data2[1,:], data2[2,:], color=:black)
scatter!(data3[1,:], data3[2,:], color=:black)
scatter!([10], [11], color=:red)
plot!(xlim=(0,14), ylim=(0,14))
plot!(aspect_ratio=1, xtick=[0, 14], ytick=[0, 14])

prompt()

The utility of nonlinearity

1D plot supervised learning: classification

seed!(0)
n1 = 20; n2 = n1; n3 = n1
data1 = 1 * randn(2,n1) .+ 5
data2 = 1 * randn(2,n2) .+ 0
data3 = 1 * randn(2,n3) .+ (-5)
plot(xlabel=L"x_1", ylabel="")
scatter!(data1[1,:], zeros(n1), color=:blue, label="class1")
scatter!(data2[1,:], zeros(n2), color=:red, label="class2")
scatter!(data3[1,:], zeros(n3), color=:blue)
plot!(xlim=(-8,7), ylim=(-1,1))
plot!(xtick=-6:3:6, ytick=[])
plot!([1, 1]*2, [-1, 1], color=:orange)

prompt()

A simple nonlinearity, abs(feature), allows linear separation

f = x -> abs(x)
data1[2,:] = f.(data1[1,:])
data2[2,:] = f.(data2[1,:])
data3[2,:] = f.(data3[1,:])
plot(xlabel=L"x_1", ylabel=L"x_2")
scatter!(data1[1,:], data1[2,:], color=:blue, label="class1")
scatter!(data2[1,:], data2[2,:], color=:red, label="class2")
scatter!(data3[1,:], data3[2,:], color=:blue)
plot!(xlim=(-8,7), ylim=(-1,10))
plot!(xtick=-6:3:6, ytick=0:5:10)
plot!([-1, 1]*8, [1, 1]*2.4, color=:orange, width=2, legend=:top)

prompt()

2D example

seed!(0)
n1 = 40; n2 = 120
data1 = randn(2,n1)
rad2 = 3 .+ 3*rand(1,n2)
ang2 = rand(1,n2) * 2π
data2 = [rad2 .* cos.(ang2); rad2 .* sin.(ang2)]
plot(xlabel=L"x_1", ylabel=L"x_2")
scatter!(data1[1,:], data1[2,:], color=:blue, label="class1")
scatter!(data2[1,:], data2[2,:], color=:red, label="class2")
plot!(xlim=[-1,1]*6, ylim=[-1,1]*6)
plot!(aspect_ratio=1, xtick=-6:6:6, ytick=-6:6:6)

prompt()

plot!([0, 1, 0, -1, 0]*3, [-1, 0, 1, 0, -1]*3, color=:orange, width=2)

prompt()

Nonlinear lifting into 3D

lift_fun = (x) -> sum(abs.(x), dims=1)
lift1 = [data1; lift_fun(data1)]
lift2 = [data2; lift_fun(data2)]
plot(xlabel=L"x_1", ylabel=L"x_2", zlabel=L"$x_3 = |x_1| + |x_2|$")
scatter!(lift1[1,:], lift1[2,:], lift1[3,:], color=:blue, label="class1")
scatter!(lift2[1,:], lift2[2,:], lift2[3,:], color=:red, label="class2")
plot!(xlim=[-1,1]*6, ylim=[-1,1]*6)
plot!(xtick=-6:6:6, ytick=-6:6:6)
plot!(camera=(30,12))
#savefig("ml-nonlin2d-lift.pdf")

prompt()


xc = -6:6
yc = -6:6
z = 3 * ones(length(xc), length(yc)) # 3 chosen manually
surface!(xc, yc, z, colorbar=nothing, alpha=0.6) #, color=:orange)

prompt()

Nonlinearity in regression

seed!(0)
N = 40
f = (x) -> 10. / (x + 1)
xt = 10 * rand(N)
yt = f.(xt) + 0.4 * randn(N)
x = range(0, 10, 101)
y = f.(x)
scatter(xt, yt, color=:blue, label="training data for regression")
plot!(xlabel=L"x", ylabel=L"y")
plot!(xlim=(0,10), ylim=(0,8))
plot!(xtick=0:5:10, ytick=0:4:8)

Afun = (tt,deg) -> [t.^i for t in tt, i in 0:deg] # matrix of monomials
A3 = Afun(xt,3)
coef3 = A3 \ yt
y3 = Afun(x,3) * coef3;

A1 = Afun(xt,1)
coef1 = A1 \ yt
y1 = Afun(x,1) * coef1;

plot!(x, y3, line=:magenta,
    label = L"\mathrm{cubic:\ } y = \alpha_3 x^3 + \alpha_2 x^2 + \alpha_1 x + \alpha_0")
plot!(x, y1, line=(:dash,:red),
    label = L"\mathrm{linear\ (affine):\ } y = \alpha_1 x + \alpha_0")

prompt()

Linear discriminant analysis (LDA)

LDA

seed!(0)
n1 = 70; n2 = n1
rot = phi -> [cos(phi) sin(-phi); sin(phi) cos(phi)]
mu1 = [7, 10]
mu2 = [9, 4]
S1 = rot(π/9) * [3 0; 0 1]
S2 = S1 # for LDA
data1 = S1 * randn(2,n1) .+ mu1
data2 = S2 * randn(2,n2) .+ mu2
plot(xlabel=L"x_1", ylabel=L"x_2")
scatter!(data1[1,:], data1[2,:], color=:blue, label="class1")
scatter!(data2[1,:], data2[2,:], color=:red, label="class2")
plot!(xlim=(0,16), ylim=(0,16))
plot!(aspect_ratio=1)
plot!(xtick=0:4:16, ytick=0:4:16)

ϕ = range(0,2π,101)
for r in [1.5 2.5]
    local x = r * cos.(ϕ)
    local y = r * sin.(ϕ)
    c1 = S1 * [x'; y'] .+ mu1
    c2 = S2 * [x'; y'] .+ mu2
    plot!(c1[1,:], c1[2,:], color=:blue)
    plot!(c2[1,:], c2[2,:], color=:red)
end
x = range(-1,17,11)
w = (S1 * S1') \ (mu2 - mu1) # LDA
c = (norm(S1 \ mu2)^2 - norm(S1 \ mu1)^2)/2
y = (c .- w[1] * x) / (w[2])
plot!(x, y, color=:magenta, width=2, legend=:topleft)

prompt()

Model-order selection

Sinusoidal regression training data

seed!(0)
Ntrain = 40
Ntest = 30
f = (x) -> 10. / (x + 1)
xtrain = 10 * rand(Ntrain)
ytrain = f.(xtrain) + 0.4 * randn(Ntrain)
xtest = 10 * rand(Ntest)
ytest = f.(xtest) + 0.4 * randn(Ntest)

x = range(0,10,201)
y = f.(x)

plot(xlabel=L"x", ylabel=L"y")
scatter!(xtrain, ytrain, color=:blue, label="training data")
scatter!(xtest, ytest, color=:red, label="test data")
plot!(xlim=(0,10), ylim=(0,8))
plot!(xtick=0:5:10, ytick=0:4:8)

prompt()

Show overfit

scatter(xtrain, ytrain, color=:blue, label="training data")
plot!(xlim=(0,10), ylim=(0,8))
plot!(xtick=0:5:10, ytick=0:4:8)

Afun = (tt,deg) -> [t.^i for t in tt, i in 0:deg] # matrix of monomials
Afun = (tt,deg) -> [cos(2π*t*i/20) for t in tt, i in 0:deg] # matrix of sinusoids
dlist = [2 9 20]
clist = (:magenta, :red, :orange)
for ii in 1:length(dlist)
    local deg = dlist[ii]
    local A = Afun(xtrain,deg)
    local coef = A \ ytrain
    local y = Afun(x,deg) * coef
    plot!(x, y, line=clist[ii], width=2, label="$deg harmonics")
end
plot!(xlabel=L"x", ylabel=L"y")

prompt()

Fit improves with more harmonics, of course

dlist = 0:30
etrain = zeros(length(dlist))
etest = zeros(length(dlist))
errs = zeros(length(dlist))
for ii in 1:length(dlist)
    deg = dlist[ii]
    Atrain = Afun(xtrain, deg)
    Atest = Afun(xtest, deg)
    # @show cond(A'*A) # sinusoids is more stable than polynomials
    local coef = Atrain \ ytrain
    yh = Atrain * coef
    etrain[ii] = norm(yh - ytrain)
    etest[ii] = norm(Atest * coef - ytest)
    errs[ii] = norm(yh - f.(xt))
end
scatter(dlist, etrain, color=:blue, label="fit to training data")
plot!(xlabel = "model order: # of sinusoids")
plot!(ylabel = L"\mathrm{fit:\ } ‖ \hat{y} - y ‖_2")
plot!(ylim=[0,13], ytick=[0,13])
scatter!(dlist, etest, color=:red, label="fit to test data")

prompt()

Cross-validation

Nlearn = Int(Ntrain / 2)
Nvalid = Ntrain - Nlearn
xlearn = xtrain[1:Nlearn]
ylearn = ytrain[1:Nlearn]
xvalid = xtrain[(Nlearn+1):Ntrain]
yvalid = ytrain[(Nlearn+1):Ntrain]

plot(xlabel=L"x", ylabel=L"y")
scatter!(xlearn, ylearn, color=:blue, label="training data (fitting)")
scatter!(xvalid, yvalid, color=:cyan, label="validation data (model selection)")
plot!(xlim=(0,10), ylim=(0,8))
plot!(xtick=0:5:10, ytick=0:4:8)

prompt()

fit improves with more harmonics, of course

dlist = 0:20
elearn = zeros(length(dlist))
evalid = zeros(length(dlist))
etest = zeros(length(dlist))
errs = zeros(length(dlist))
for ii in 1:length(dlist)
    deg = dlist[ii]
    Alearn = Afun(xlearn, deg)
    Avalid = Afun(xvalid, deg)
    Atest = Afun(xtest, deg)
    # @show cond(A'*A) # sinusoids is more stable than polynomials
    local coef = Alearn \ ylearn
    elearn[ii] = norm(Alearn * coef - ylearn)
    evalid[ii] = norm(Avalid * coef - yvalid)
    etest[ii] = norm(Atest * coef - ytest)
    errs[ii] = norm([Alearn; Avalid]*coef - f.([xlearn; xvalid]))
end
scatter(dlist, elearn, color=:blue, label="fit to training data")
scatter!(dlist, evalid, color=:cyan, label="fit to validation data")
plot!(xlabel = "model order: # of sinusoids")
plot!(ylabel = L"fit: \ ‖ \hat{y} - y ‖_2")
plot!(ylim=[0,13], ytick=[0,13])
dbest = findall(diff(evalid) .>= 0)[1] # find first increase in validation error
plot!(xtick=[0, dlist[dbest], 20])
scatter!(dlist, etest, color=:red, label="fit to test data")

prompt()

Reproducibility

This page was generated with the following version of Julia:

io = IOBuffer(); versioninfo(io); split(String(take!(io)), '\n')

11-element Vector{SubString{String}}:
 "Julia Version 1.11.6"
 "Commit 9615af0f269 (2025-07-09 12:58 UTC)"
 "Build Info:"
 "  Official https://julialang.org/ release"
 "Platform Info:"
 "  OS: Linux (x86_64-linux-gnu)"
 "  CPU: 4 × AMD EPYC 7763 64-Core Processor"
 "  WORD_SIZE: 64"
 "  LLVM: libLLVM-16.0.6 (ORCJIT, znver3)"
 "Threads: 1 default, 0 interactive, 1 GC (on 4 virtual cores)"
 ""

And with the following package versions

import Pkg; Pkg.status()

Status `~/work/ismrm_ml2/ismrm_ml2/docs/Project.toml`
  [31c24e10] Distributions v0.25.120
  [e30172f5] Documenter v1.14.1
  [587475ba] Flux v0.16.5
  [b964fa9f] LaTeXStrings v1.4.0
  [98b081ad] Literate v2.20.1
  [170b2178] MIRTjim v0.25.0
  [eb30cadb] MLDatasets v0.7.18
  [91a5bcdd] Plots v1.40.18
  [2913bbd2] StatsBase v0.34.6
  [1986cc42] Unitful v1.24.0
  [ef84fa70] ismrm_ml2 v0.0.1 `~/work/ismrm_ml2/ismrm_ml2`
  [b77e0a4c] InteractiveUtils v1.11.0
  [37e2e46d] LinearAlgebra v1.11.0
  [9a3f8284] Random v1.11.0

This page was generated using Literate.jl.