# Introduction

In the previous article we have achieved topological ordering and created node classes, basically all the preparations have been made, so let's start simulating the calculation process of the neural network, in which many classes will be created and classes and methods will be constantly rewritten. Please be prepared.

In the last code we implemented topological ordering, node class creation, and graph generation. Then this section uses code to do forward and backward calculations.

import random
from functools import reduce
from collections import defaultdict

def topologic(graph):
"""Topological Sorting"""
sorted_node = []

while graph:
all_nodes_have_inputs = reduce(lambda a, b: a + b,list(graph.values())) #All nodes with input
all_node_have_outputs = list(graph.keys()) #All Output Nodes
all_nodes_only_have_outputs_no_inputs = set(all_node_have_outputs) - set(all_nodes_have_inputs)

if all_nodes_only_have_outputs_no_inputs:
node = random.choice(list(all_nodes_only_have_outputs_no_inputs))

sorted_node.append(node)

if len(graph) == 1:
sorted_node += graph[node]

graph.pop(node)

else:
raise TypeError('This graph has circle, which cannot get topological order')

return sorted_node

class Node:
"""Define a Node Class"""
def __init__(self, inputs= [],name = None):
"""Initialization parameters"""
self.inputs = inputs
self.outputs = []
self.name = name

for node in inputs:
node.outputs.append(self)

def __repr__(self):
"""When printing, print the node name"""
return f'Node:{self.name}'

def convert_feed_dict_to_graph(feed_dict):
"""Generate a graph from peripheral nodes"""
need_expand = [n for n in feed_dict]

computing_graph = defaultdict(list)

while need_expand:
n = need_expand.pop(0)

if n in computing_graph:continue

for m in n.outputs:
computing_graph[n].append(m)
need_expand.append(m)
return computing_graph

#Define Nodes
node_x = Node(name = 'x')
node_k = Node(name = 'k')
node_b = Node(name = 'b')
node_y = Node(name = 'y')
node_linear = Node(inputs = [node_x, node_k, node_b],name = 'linear')
node_sigmoid = Node(inputs = [node_linear],name = 'sigmoid')
node_loss = Node(inputs = [node_y,node_sigmoid],name = 'loss')
#Outer Edge Node
need_feed_value_nodes = [node_x, node_y, node_k, node_b]

print(topologic(convert_feed_dict_to_graph(feed_dict)))

>>>[Node:y, Node:x, Node:k, Node:b, Node:linear, Node:sigmoid, Node:loss]



among k , b , x , y k,b,x,y k,b,x,y are the parameters and data that we set for input, and this node is defined as a placeholder. With the initial data, we will solve in the order in which we have sorted the topology and define a forward calculation method.

class Node:
def __init__(self, inputs= [],name = None):
self.inputs = inputs
self.outputs = []
self.name = name

for node in inputs:
node.outputs.append(self)

def forward(self):
print(f"I am {self.name} ,i calculate myself value")
def __repr__(self):
return f'Node:{self.name}'

class Placeholder(Node):
def __init__(self,name = None):
Node.__init__(self,name = name)

def forward(self):
print(f"I am {self.name},my value is {self.value},i calculate myself value, I have been given value")

def __repr__(self):
return f'Placeholder:{self.name}'

#The function that generates the graph also needs to be modified
def convert_feed_dict_to_graph(feed_dict):

need_expand = [n for n in feed_dict]
computing_graph = defaultdict(list)
while need_expand:
n = need_expand.pop(0)
if n in computing_graph:continue

if isinstance(n,Placeholder): n.value = feed_dict[n]

for m in n.outputs:
computing_graph[n].append(m)
need_expand.append(m)
return computing_graph

# The instantiation of the node also needs to be changed
node_x = Placeholder(name = 'x')
node_k = Placeholder(name = 'k')
node_b = Placeholder(name = 'b')
node_y = Placeholder(name = 'y')
node_linear = Node(inputs = [node_x, node_k, node_b],name = 'linear')
node_sigmoid = Node(inputs = [node_linear],name = 'sigmoid')
node_loss = Node(inputs = [node_y,node_sigmoid],name = 'loss')

#Assigning values to corresponding nodes
feed_dict = {
node_x :3,
node_y :random.random(),
node_k :random.random(),
node_b :0.38
}

#Sort Topologically
sorted_node = topologic(convert_feed_dict_to_graph(feed_dict))
#Look at the output calculated forward
for node in sorted_node:
node.forward()

>>>I am y ,i calculate myself value, I have been given value
I am x ,i calculate myself value, I have been given value
I am k ,i calculate myself value, I have been given value
I am b ,i calculate myself value, I have been given value
I am linear ,i calculate myself value by myself
I am sigmoid ,i calculate myself value by myself
I am loss ,i calculate myself value by myself


## Forward calculation

We can see that the node objects of the non-Placeholder class are computed, so we need to define three classes, write the corresponding function expression methods, and define a value attribute in the Node class.
So the result of sorting by topology, passing the input data one level at a time to the next, and finally getting the loss value, is called forward calculation.

# We need numpy
import numpy as np

class Node:
def __init__(self, inputs= [],name = None):
self.inputs = inputs
self.outputs = []
self.name = name
self.value = None

for node in inputs:
node.outputs.append(self)
def forward(self):
print(f"I am {self.name},i calculate myself value by myself")

def __repr__(self):
return f'Node:{self.name}'

class Placeholder(Node):
def __init__(self,name = None):
Node.__init__(self,name = name)

def forward(self):
print(f"I am {self.name},my value is {self.value},i calculate myself value, I have been given value")

def __repr__(self):
return f'Placeholder:{self.name}'

class Linear(Node):
def __init__(self,x,k,b,name = None):
Node.__init__(self,inputs = [x,k,b],name = name)

def forward(self):
print(f"I am {self.name} ,my value is {self.value},i calculate myself value by my self!")
x, k, b = self.inputs[0], self.inputs[1], self.inputs[2]
self.value = k.value * x.value + b.value

def __repr__(self):
return f'Linear:{self.name}'

class Sigmoid(Node):
def __init__(self,x,name = None):
Node.__init__(self,inputs = [x],name = name)

def _sigmoid(self, x):
return 1 / (1 + np.exp(-x))

def forward(self):
print(f"I am {self.name} ,my value is {self.value},i calculate myself value by my self!")
x = self.inputs[0]
self.value = self._sigmoid(x.value)

def __repr__(self):
return f'Sigmoid:{self.name}'

class Loss(Node):
"""MSE"""
def __init__(self,y,yhat,name = None):
Node.__init__(self,inputs = [y,yhat],name = name)

def forward(self):
print(f"I am {self.name} ,my value is {self.value},i calculate myself value by my self!")
y,yhat = self.inputs[0], self.inputs[1]
self.value = np.mean((y.value - yhat.value)**2)

def __repr__(self):
return f'Sigmoid:{self.name}'

#Redefine Node
node_x = Placeholder(name = 'x')
node_k = Placeholder(name = 'k')
node_b = Placeholder(name = 'b')
node_y = Placeholder(name = 'y')
node_linear = Linear(node_x, node_k, node_b,name = 'linear')
node_sigmoid = Sigmoid(x = node_linear,name = 'sigmoid')
node_loss = Loss(y = node_y, yhat = node_sigmoid,name = 'loss')

#Check the results as well
feed_dict = {
node_x :3,
node_y :random.random(),
node_k :random.random(),
node_b :0.38
}

sorted_node = topologic(convert_feed_dict_to_graph(feed_dict))
for node in sorted_node:
node.forward()

>>>I am k,my value is 0.06687895212397421,i calculate myself value, I have been given value
I am b,my value is 0.38,i calculate myself value, I have been given value
I am y,my value is 0.2993957062307556,i calculate myself value, I have been given value
I am x,my value is 3,i calculate myself value, I have been given value
I am linear ,my value is 2.650973615995455,i calculate myself value by my self!
I am sigmoid ,my value is 0.9340709734538888,i calculate myself value by my self!
I am loss ,my value is 0.01786359719043438,i calculate myself value by my self!


As you can see from the results, we have calculated the value of each node by forward calculation.

## Backward calculation

Forward calculation is based on the input values, one level at a time to get the Loss values, then we need feedback from the Loss values, and then we need to use the gradient descent method to derive and re-optimize the parameters. This way we get the final model forward and backward. Then we need to try to write the code for backward calculation.

#So let's start by adding a backward calculation to see how to do this
class Node:
def __init__(self, inputs= [],name = None):
self.inputs = inputs
self.outputs = []
self.name = name
self.value = None

for node in inputs:
node.outputs.append(self)
def forward(self):
print(f"I am {self.name},i calculate myself value by myself")

def backward(self):
for n in self.inputs:
print('get ∂{}/∂{}'.format(self.name,n.name))

def __repr__(self):
return f'Node:{self.name}'

...
...

for node in sorted_node[::-1]:   <<<
print(f'I am {node.name}')
node.backward()

>>>I am loss
get ∂loss/∂y
get ∂loss/∂sigmoid
I am sigmoid
get ∂sigmoid/∂linear
I am y
I am linear
get ∂linear/∂x
get ∂linear/∂k
get ∂linear/∂b
I am k
I am b
I am x


I have omitted the above code and just need to override the additions associated with it. The final output shows that we can derive the derivation in the way we conceived it, so the next step is to write a backward calculation method to the other classes.
But one thing to note is that this line in the code <<< is the reverse output of the sorting results we get, which is obvious, but if we change [:-1] to positive instead, the code will get an error. This is also consistent with our chain derivation rule. For example, we don't get it first. ∂ L o s s ( ) ∂ S i g m o i d ( ) \frac{\partial Loss()}{\partial Sigmoid()} _Sigmoid() Loss() cannot be found ∂ S i g m o i d ( ) ∂ L i n e a r ( ) \frac{\partial Sigmoid()}{\partial Linear()} Linear ()Sigmoid(). You can also experiment in your own environment.

The next step is to write a backward calculation method for each node class, and add a gradients attribute to the Node class to store each bias, as we wrote in our second article.

class Node:
def __init__(self, inputs= [],name = None):
self.inputs = inputs
self.outputs = []
self.name = name
self.value = None
self.gradients = dict()  #Store loss values for a partial derivative

for node in inputs:
node.outputs.append(self)
def forward(self):
print(f"I am {self.name},i calculate myself value by myself")

def __repr__(self):
return f'Node:{self.name}'

class Placeholder(Node):
def __init__(self,name = None):
Node.__init__(self,name = name)

def forward(self):
print(f"I am {self.name},my value is {self.value},i calculate myself value, I have been given value")

def backward(self):

def __repr__(self):
return f'Placeholder:{self.name}'

class Linear(Node):
def __init__(self,x,k,b,name = None):
Node.__init__(self,inputs = [x,k,b],name = name)

def forward(self):
print(f"I am {self.name} ,my value is {self.value},i calculate myself value by my self!")
x, k, b = self.inputs[0], self.inputs[1], self.inputs[2]
self.value = k.value * x.value + b.value

def backward(self):
'∂{} / ∂{}'.format(self.name, self.inputs[0].name)])
'∂{} / ∂{}'.format(self.name, self.inputs[1].name)])
'∂{} / ∂{}'.format(self.name, self.inputs[2].name)])

def __repr__(self):
return f'Linear:{self.name}'

class Sigmoid(Node):
def __init__(self,x,name = None):
Node.__init__(self,inputs = [x],name = name)

def _sigmoid(self, x):
return 1 / (1 + np.exp(-x))

def forward(self):
print(f"I am {self.name} ,my value is {self.value},i calculate myself value by my self!")
x = self.inputs[0]
self.value = self._sigmoid(x.value)

def backward(self):
, 'get ∂{}/∂{}'.format(self.name, self.inputs[0].name)])

def __repr__(self):
return f'Sigmoid:{self.name}'

class Loss(Node):
"""MSE"""
def __init__(self,y,yhat,name = None):
Node.__init__(self,inputs = [y,yhat],name = name)

def forward(self):
print(f"I am {self.name} ,my value is {self.value},i calculate myself value by my self!")
y,yhat = self.inputs[0], self.inputs[1]
self.value = np.mean((y.value - yhat.value)**2)

def backward(self):

def __repr__(self):
return f'Sigmoid:{self.name}'

...
...
for node in sorted_node[::-1]:
print(f'I am {node.name}')
node.backward()

>>> I am loss
I am sigmoid
I am linear
self.gradients[self.inputs[0]] get ∂loss/∂sigmoid*get ∂sigmoid/∂linear*∂linear / ∂x
self.gradients[self.inputs[1]] get ∂loss/∂sigmoid*get ∂sigmoid/∂linear*∂linear / ∂k
self.gradients[self.inputs[2]] get ∂loss/∂sigmoid*get ∂sigmoid/∂linear*∂linear / ∂b
I am b
I got myself gradients: get ∂loss/∂sigmoid*get ∂sigmoid/∂linear*∂linear / ∂b
I am k
I got myself gradients: get ∂loss/∂sigmoid*get ∂sigmoid/∂linear*∂linear / ∂k
I am x
I got myself gradients: get ∂loss/∂sigmoid*get ∂sigmoid/∂linear*∂linear / ∂x
I am y
I got myself gradients: get ∂loss/∂y


This gives the partial derivatives of each parameter.
In fact, the derivation of parameters is not as complex as we expected.
Such as the required k-bias
L o s s ′ ( s i g m o i d ( l i n e a r ( k ) ) ) Loss'(sigmoid(linear(k))) Loss′(sigmoid(linear(k)))
It can actually be divided into these parts
x = s i g m o i d ( l i n e a r ( k ) ) , L o s s ( x ) x = sigmoid(linear(k)),Loss(x) x=sigmoid(linear(k)),Loss(x)
= > ∂ L o s s ( ) ∂ x = > ∂ L o s s ( ) ∂ S i g m o i d ( ) ∗ S i g m o i d ′ ( L i n e a r ( k ) ) = > \frac{\partial Loss()}{\partial x} =>\frac{\partial Loss()}{\partial Sigmoid() } * Sigmoid'(Linear(k)) =>∂x∂Loss()​=>∂Sigmoid()∂Loss()​∗Sigmoid′(Linear(k))
= > ∂ L o s s ∂ S i g m o i d ∗ ∂ S i g m o i d ∂ L i n e a r ∗ L i n e a r ′ ( k ) => \frac{\partial Loss}{\partial Sigmoid }*\frac{\partial Sigmoid}{\partial Linear}*Linear'(k) =>∂Sigmoid∂Loss​∗∂Linear∂Sigmoid​∗Linear′(k)
= > ∂ L o s s ∂ S i g m o i d ∗ ∂ S i g m o i d ∂ L i n e a r ∗ ∂ L i n e a r ∂ k => \frac{\partial Loss}{\partial Sigmoid }*\frac{\partial Sigmoid}{\partial Linear}* \frac{\partial Linear}{\partial k} =>∂Sigmoid∂Loss​∗∂Linear∂Sigmoid​∗∂k∂Linear​
In fact, it is the content of chain derivation, but it is easier for the program to implement, because we did not define a function is a function left as x in the figure, then we can store the value of each function derivation for a single, and then multiply and join.

In the process of writing code, we need to mention that the input node of the previous node is the output node of the next node, so some inputs are equal to some outputs. This is only a small point in code programming.

Now that we have achieved reverse propagation, the next step is to rewrite the categories and bring in the numerical calculations

class Linear(Node):
def __init__(self,x,k,b,name = None):
Node.__init__(self,inputs = [x,k,b],name = name)

def forward(self):
print(f"I am {self.name} ,my value is {self.value},i calculate myself value by my self!")
x, k, b = self.inputs[0], self.inputs[1], self.inputs[2]
self.value = k.value * x.value + b.value

def backward(self):
x, k, b = self.inputs[0], self.inputs[1], self.inputs[2]

def __repr__(self):
return f'Linear:{self.name}'

class Sigmoid(Node):
def __init__(self,x,name = None):
Node.__init__(self,inputs = [x],name = name)

def _sigmoid(self, x):
return 1 / (1 + np.exp(-x))

def forward(self):
print(f"I am {self.name} ,my value is {self.value},i calculate myself value by my self!")
x = self.inputs[0]
self.value = self._sigmoid(x.value)

def backward(self):
x = self.inputs[0]

def __repr__(self):
return f'Sigmoid:{self.name}'

class Loss(Node):
"""MSE"""
def __init__(self,y,yhat,name = None):
Node.__init__(self,inputs = [y,yhat],name = name)

def forward(self):
print(f"I am {self.name} ,my value is {self.value},i calculate myself value by my self!")
y,yhat = self.inputs[0], self.inputs[1]
self.value = np.mean((y.value - yhat.value)**2)

def backward(self):
y,yhat = self.inputs[0], self.inputs[1]
self.gradients[self.inputs[0]] = 2 * np.mean(y.value - yhat.value)
self.gradients[self.inputs[1]] = -2 * np.mean(y.value - yhat.value)

def __repr__(self):
return f'Sigmoid:{self.name}'

...
...

for node in sorted_node[::-1]:
print('\n I am {}'.format(node.name))
node.backward()

>>>
I am loss

I am sigmoid

I am linear

I am k

I am x

I am y

I am b


That's the code for the reverse propagation implementation. In fact, we have basically built a deep learning framework here. Then the next step is the training of the model. As I said before, the training process is to get loss values by forward calculation, then reverse propagation by loss values, so that parameters are continuously optimized to get the model.
So here's how to write down the training, which should be familiar

However, it is important to note that during the training process, we only train the coefficients k , b k,b k,b so x , y x,y x,y don't need to be updated, so in the code we need to add a parameter to the Node class to represent the trainable object.

class Node:
def __init__(self, inputs= [],name = None, is_trainable = False):
self.inputs = inputs
self.outputs = []
self.name = name
self.value = None
self.gradients = dict()  #Store loss values for a partial derivative
self.is_trainable = is_trainable

for node in inputs:
node.outputs.append(self)
def forward(self):
print(f"I am {self.name},i calculate myself value by myself")

node_x = Placeholder(name = 'x')
node_k = Placeholder(name = 'k',is_trainable = True)
node_b = Placeholder(name = 'b',is_trainable = True)
node_y = Placeholder(name = 'y')
node_linear = Linear(node_x, node_k, node_b,name = 'linear')
node_sigmoid = Sigmoid(x = node_linear,name = 'sigmoid')
node_loss = Loss(y = node_y, yhat = node_sigmoid,name = 'loss')

class Placeholder(Node):
def __init__(self,name = None):
Node.__init__(self,name = name, is_trainable= is_trainable)

def forward(self):
print(f"I am {self.name},my value is {self.value},i calculate myself value, I have been given value")

def backward(self):

def __repr__(self):
return f'Placeholder:{self.name}'

class Placeholder(Node):
def __init__(self,name = None, is_trainable = False):
Node.__init__(self,name = name, is_trainable= is_trainable)

def forward(self):
#         print(f"I am {self.name},my value is {self.value},i calculate myself value, I have been given value")
pass

def backward(self):

def __repr__(self):
return f'Placeholder:{self.name}'

learning_rate = 1e-1
for node in sorted_node:
if node.is_trainable:
node.value = node.value + -1 * node.gradients[node] * learning_rate
cmp = 'large' if node.gradients[node] > 0 else 'small'
print("{}'value is too {}, I need update myself to {}".format(node.name, cmp,node.value))

>>> b'value is too small, I need update myself to 0.3861182465707451
k'value is too small, I need update myself to 0.4286512588727166


So we've finished a training session

# Final encapsulation

Finally, we'll encapsulate the code that we trained after and the code that we calculated forward and backward.

def forward(graph_sorted_nodes):
for node in graph_sorted_nodes:
node.forward()
if isinstance(node,Loss):
print('Loss value: {}'.format(node.value))

def backward(graph_sorted_nodes):
for node in graph_sorted_nodes[::-1]:
#         print('\n I am {}'.format(node.name))
node.backward()

def run_one_epoch(graph_sorted_nodes):
forward(graph_sorted_nodes)
backward(graph_sorted_nodes)

def optimize(graph_nodes, learning_rate = 1e-1):
for node in graph_nodes:
if node.is_trainable:
node.value = node.value + -1 * node.gradients[node] * learning_rate
cmp = 'large' if node.gradients[node] > 0 else 'small'
print("{}'value is too {}, I need update myself to {}".format(node.name, cmp,node.value))

run_one_epoch(sorted_node)
optimize(sorted_node)

>>>Loss value: 1.3578421759877876e-05
b'value is too small, I need update myself to 0.38741156105437113
k'value is too small, I need update myself to 0.4325312023235947


So we do a complete training, then we add a piece of code to do a training, and observe and test the results

# View the gradient descent process

loss_history = []

for i in range(100):
run_one_epoch(sorted_node)
__loss_node = sorted_node[-1]
assert isinstance(__loss_node,Loss)

loss_history.append(__loss_node.value)
optimize(sorted_node,learning_rate = 1e-1)


This is the result of my operation

And finally my test results

# summary

At this point, we have completed the core of all in-depth learning. First, we understand the gradient descent, then we get the order of nodes by topological ordering, then we continuously optimize the parameters by propagating forward and backward, and finally we get the model we need.
End!!
Sprinkle~~
★,°:.☆(￣▽￣)/\$:.°★ .

I will rearrange and optimize the relevant code to send the source code separately (_)!

Posted on Thu, 09 Sep 2021 17:56:44 -0400 by arbelo