MNIST
TLDR
$ cd ~/.julia/v0.4/Latte/examples/mnist/data $ ./get-data.sh $ cd .. $ julia mnist.jl
Preparing the data
These steps can be performed automatically by running the get-data.sh
script inside the examples/mnist
directory.
First we will download the MNIST dataset from Yann Lecun's website.
TARGET_DIR=$(pwd) for dset in train-images-idx3-ubyte.gz train-labels-idx1-ubyte.gz \ t10k-images-idx3-ubyte.gz t10k-labels-idx1-ubyte.gz do curl -o $TARGET_DIR/$dset -O http://yann.lecun.com/exdb/mnist/$dset STEM=$(basename "${dset}" .gz) gunzip -c $TARGET_DIR/$dset > $TARGET_DIR/$STEM done
Next we will convert the binary data into HDF5 datasets. This code is contained in convert.jl
.
This begins by reading the binary files using Julia's open
.
base_dir = "./" datasets = Dict("train" => ["$base_dir/train-labels-idx1-ubyte","$base_dir/train-images-idx3-ubyte"], "test" => ["$base_dir/t10k-labels-idx1-ubyte","$base_dir/t10k-images-idx3-ubyte"]) for key in keys(datasets) label_fn, data_fn = datasets[key] label_f = open(label_fn) data_f = open(data_fn)
Next we read the headers for the binary data to give us information about the dataset dimensions.
label_header = read(label_f, Int32, 2) @assert ntoh(label_header[1]) == 2049 n_label = round(Int, ntoh(label_header[2])) data_header = read(data_f, Int32, 4) @assert ntoh(data_header[1]) == 2051 n_data = round(Int, ntoh(data_header[2])) @assert n_label == n_data h = round(Int, ntoh(data_header[3])) w = round(Int, ntoh(data_header[4]))
Next we open an HDF5 file for writing and initialize two datasets (label and data).
println("Exporting $n_data digits of size $h x $w") h5open("$base_dir/$key.hdf5", "w") do h5 dset_data = d_create(h5, "data", datatype(Float32), dataspace(w, h, 1, n_data)) dset_label = d_create(h5, "label", datatype(Float32), dataspace(1, n_data))
Then we read the label and data bytes and convert them to Arrays of Float32. We normalize the data to values between [0, 1) by dividing by 256.
img = readbytes(data_f, n_data * h*w) img = convert(Array{Float32},img) / 256 # scale into [0,1) class = readbytes(label_f, n_data) class = convert(Array{Float32},class)
We will permute the indices of the dataset so that they are stored in a shuffled ordering. Then we iterate over the permuted indices and store the data and label values into the HDF5 dataset.
idx = 1:n_data println(" $idx...") idx = collect(idx) rp = randperm(length(idx)) for j = 1:length(idx) r_idx = rp[j] dset_data[:,:,1,idx[j]] = img[(r_idx-1)*h*w+1:r_idx*h*w] dset_label[1,idx[j]] = class[r_idx] end
The Model
The model code can be found in examples/mnist.jl
using Latte net = Net(100) data, label = HDF5DataLayer(net, "data/train.txt", "data/test.txt") conv1 = ConvolutionLayer(:conv1, net, data, 20, 5, 1, 1) relu1 = ReLULayer(:relu1, net, conv1) pool1 = MaxPoolingLayer(:pool1, net, relu1, 2, 2, 0) conv2 = ConvolutionLayer(:conv2, net, pool1, 50, 5, 1, 1) relu2 = ReLULayer(:relu2, net, conv2) pool2 = MaxPoolingLayer(:pool2, net, relu2, 2, 2, 0) conv3 = ConvolutionLayer(:conv3, net, pool1, 50, 3, 1, 1) relu3 = ReLULayer(:relu3, net, conv3) pool3 = MaxPoolingLayer(:pool3, net, relu3, 2, 2, 0) fc4 = InnerProductLayer(:fc4, net, pool3, 512) relu4 = ReLULayer(:relu4, net, fc4) fc5 = InnerProductLayer(:fc5, net, relu4, 512) relu5 = ReLULayer(:relu5, net, fc5) fc6 = InnerProductLayer(:fc6, net, relu5, 10) loss = SoftmaxLossLayer(:loss, net, fc6, label) accuracy = AccuracyLayer(:accuracy, net, fc6, label) params = SolverParameters( lr_policy = LRPolicy.Inv(0.01, 0.0001, 0.75), mom_policy = MomPolicy.Fixed(0.9), max_epoch = 50, regu_coef = .0005) sgd = SGD(params) solve(sgd, net)