1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
| def do_adam(): w,b,eta = init_w,init_b,0.1 m_w,m_b,v_w,v_b,m_w_hat,m_b_hat,v_w_hat,v_b_hat,eps,beta1,beta2 = 0,0,0,0,0,0,0,0,1e-8,0.9,0.999 for i in range(max_epochs): dw,db = 0,0 for x,y in zip(X,Y): dw += grad_w(w,b,x,y) db += grad_b(w,b,x,y) m_w = beta1*m_w + (1-beta1) * dw m_b = beta1*m_b + (1-beta1) * db v_w = beta2*v_w + (1-beta2) * dw**2 v_b = beta2*v_b + (1-beta2) * db**2 m_w_hat = m_w / (1-math.pow(beta1, i+1)) m_b_hat = m_b / (1-math.pow(beta1, i+1)) v_w_hat = v_w / (1-math.pow(beta2, i+1)) v_b_hat = v_b / (1-math.pow(beta2, i+1)) w -= (eta / np.sqrt(v_w_hat+eps)) * m_w_hat b -= (eta / np.sqrt(v_b_hat+eps)) * m_b_hat
|