Julia Neural Network code same speed as PyPy

Question

So I have some neural network code in Python which I rewrote in Julia. The straight Python code runs in about 7 seconds, while both the Julia and PyPy code run in about 0.75 seconds

sigmoid(z::Float64) = 1/(1 + exp(-z))
sigmoidPrime(z::Float64) = sigmoid(z) * (1 - sigmoid(z))

### Types ###

abstract AbstractNode

type Edge
    source::AbstractNode
    target::AbstractNode
    weight::Float64
    derivative::Float64
    augmented::Bool

    Edge(source::AbstractNode, target::AbstractNode) = new(source, target, randn(1,1)[1], 0.0, false)
end

type Node <: AbstractNode
    incomingEdges::Vector{Edge}
    outgoingEdges::Vector{Edge}
    activation::Float64
    activationPrime::Float64

    Node() = new([], [], -1.0, -1.0)
end

type InputNode <: AbstractNode
    index::Int
    incomingEdges::Vector{Edge}
    outgoingEdges::Vector{Edge}
    activation::Float64

    InputNode(index::Int) = new(index, [], [], -1.0)
end

type BiasNode <: AbstractNode
    incomingEdges::Vector{Edge}
    outgoingEdges::Vector{Edge}
    activation::Float64

    BiasNode() = new([], [], 1.0)
end

type Network
    inputNodes::Vector{InputNode}
    hiddenNodes::Vector{Node}
    outputNodes::Vector{Node}

    function Network(sizes::Array, bias::Bool=true)
        inputNodes = [InputNode(i) for i in 1:sizes[1]];
        hiddenNodes = [Node() for _ in 1:sizes[2]];
        outputNodes = [Node() for _ in 1:sizes[3]];

        for inputNode in inputNodes
            for node in hiddenNodes
                edge = Edge(inputNode, node);
                push!(inputNode.outgoingEdges, edge)
                push!(node.incomingEdges, edge)
            end
        end

        for node in hiddenNodes
            for outputNode in outputNodes
                edge = Edge(node, outputNode);
                push!(node.outgoingEdges, edge)
                push!(outputNode.incomingEdges, edge)
            end
        end

        if bias == true
            biasNode = BiasNode()
            for node in hiddenNodes
                edge = Edge(biasNode, node);
                push!(biasNode.outgoingEdges, edge)
                push!(node.incomingEdges, edge)
            end
        end

        new(inputNodes, hiddenNodes, outputNodes)
    end
end


### Methods ###

function evaluate(obj::Node, inputVector::Array)
    if obj.activation > -0.5
        return obj.activation
    else
        weightedSum = sum([d.weight * evaluate(d.source, inputVector) for d in obj.incomingEdges])
        obj.activation = sigmoid(weightedSum)
        obj.activationPrime = sigmoidPrime(weightedSum)

        return obj.activation
    end
end

function evaluate(obj::InputNode, inputVector::Array)
    obj.activation = inputVector[obj.index]
    return obj.activation
end

function evaluate(obj::BiasNode, inputVector::Array)
    obj.activation = 1.0
    return obj.activation
end

function updateWeights(obj::AbstractNode, learningRate::Float64)
    for d in obj.incomingEdges
        if d.augmented == false
            d.augmented = true
            d.weight -= learningRate * d.derivative
            updateWeights(d.source, learningRate)
            d.derivative = 0.0
        end
    end
end

function compute(obj::Network, inputVector::Array)
    output = [evaluate(node, inputVector) for node in obj.outputNodes]
    for node in obj.outputNodes
        clear(node)
    end
    return output
end

function clear(obj::AbstractNode)
    for d in obj.incomingEdges
        obj.activation = -1.0
        obj.activationPrime = -1.0
        d.augmented = false
        clear(d.source)
    end
end

function propagateDerivatives(obj::AbstractNode, error::Float64)
    for d in obj.incomingEdges
        if d.augmented == false
            d.augmented = true
            d.derivative += error * obj.activationPrime * d.source.activation
            propagateDerivatives(d.source, error * d.weight * obj.activationPrime)
        end
    end
end

function backpropagation(obj::Network, example::Array)
    output = [evaluate(node, example[1]) for node in obj.outputNodes]
    error = output - example[2]
    for (node, err) in zip(obj.outputNodes, error)
        propagateDerivatives(node, err)
    end

    for node in obj.outputNodes
        clear(node)
    end
end

function train(obj::Network, labeledExamples::Array, learningRate::Float64=0.7, iterations::Int=10000)
    for _ in 1:iterations
        for ex in labeledExamples
            backpropagation(obj, ex)
        end

        for node in obj.outputNodes
            updateWeights(node, learningRate)
        end

        for node in obj.outputNodes
            clear(node)
        end
    end
end


labeledExamples = Array[Array[[0,0,0], [0]],
                        Array[[0,0,1], [1]],
                        Array[[0,1,0], [0]],
                        Array[[0,1,1], [1]],
                        Array[[1,0,0], [0]],
                        Array[[1,0,1], [1]],
                        Array[[1,1,0], [1]],
                        Array[[1,1,1], [0]]];

neuralnetwork = Network([3,4,1])
@time train(neuralnetwork, labeledExamples)

I haven't provided the Python code because I'm not sure it's necessary (however I will if you really want it), I'm certainly not expecting one to spend lots of time making complete sense of this code, I'm just basically looking for glaring/systematic inefficiencies related to proper Julia implementation (as opposed to the algorithm itself).

My motivation for doing this is that designing a neural network this way is so much more natural than vectorizing the algorithm and using Numpy, but of course all that looping and jumping around the class structures is slow in Python.

Thus this seemed like a natural choice for porting to Julia and seeing if I couldn't get some major speed ups, and while an order of magnitude speed up over straight Python is cool, what I was really hoping for was an order of magnitude speed up over PyPy (some benchmarks I found online seemed to suggest this was a reasonable expectation).

Note: This has to be run in Julia 0.3 to work

StefanKarpinski StefanKarpinski · Accepted Answer · 2014-08-10T03:33:23

This does seem like more of a code review than a question (there aren't any question marks), but I'll take a crack at it anyway. The only obvious potential performance issue is that you're allocating arrays via comprehensions in evaluate, compute and backpropagation. That weighted sum computation in evaluate would be much more efficient as a for loop. For the other two methods, you may want to use pre-allocated arrays instead of comprehensions. You can use Julia's built-in profiler to see where your code is spending most of its time – that may reveal some non-obvious hot spots that you can further optimize.

Regarding the comparison to PyPy, it's quite possible that both Julia and PyPy are doing very well with this code – at or near C performance – in which case you wouldn't expect Julia to be much faster than PyPy, since they're both close to optimal. Comparing to the performance of a C implementation would be very informative since it would show how much performance is being left on the table by both Julia and PyPy. Fortunately, this code seems like it would be pretty straightforward to port to C.

Julia Neural Network code same speed as PyPy

1 Answers