14 #include <shogun/lib/external/libqp.h>
70 float64_t d1 = g_lo+g_hi - 3*(f_lo-f_hi)/(a_lo-a_hi);
72 float64_t a_j = a_hi -(a_hi-a_lo)*(g_hi+d2-d1)/(g_hi-g_lo+2*d2);
76 if ((a_j < a_lo) || (a_j > a_hi))
78 a_j = 0.5*(a_lo+a_hi);
83 if ((a_j > a_lo) || (a_j < a_hi))
85 a_j = 0.5*(a_lo+a_hi);
89 cur_solution.
add(cur_solution.
vector, 1.0,
90 initial_solution.
vector, a_j,
95 = 0.5*lambda*cur_solution.
dot(cur_solution.
vector,
103 (cur_fval > (initial_fval+wolfe_c1*a_j*init_lgrad))
117 if (
CMath::abs(cur_lgrad) < -wolfe_c2*init_lgrad)
120 ls_res.
fval = cur_fval;
121 ls_res.
reg = cur_reg;
128 if (cur_lgrad*(a_hi - a_lo) >= 0)
147 ls_res.
fval = cur_fval;
148 ls_res.
reg = cur_reg;
192 std::vector<line_search_res> ret;
207 initial_step.
fval = cur_fval;
208 initial_step.
reg = cur_reg;
209 initial_step.
gradient = cur_subgrad;
211 ret.push_back(initial_step);
219 (cur_fval > initial_val+wolfe_c1*cur_a*initial_lgrad)
221 (cur_fval >= prev_fval && iter > 0)
225 zoom(machine, lambda, prev_a, cur_a, initial_val,
226 initial_solution, search_dir, wolfe_c1, wolfe_c2,
227 initial_lgrad, prev_fval, prev_lgrad, cur_fval, cur_lgrad)
232 if (
CMath::abs(cur_lgrad) <= -wolfe_c2*initial_lgrad)
236 ls_res.
fval = cur_fval;
237 ls_res.
reg = cur_reg;
240 ret.push_back(ls_res);
247 zoom(machine, lambda, cur_a, prev_a, initial_val,
248 initial_solution, search_dir, wolfe_c1, wolfe_c2,
249 initial_lgrad, cur_fval, cur_lgrad, prev_fval, prev_lgrad)
254 if ((
CMath::abs(cur_a - amax) <= 0.01*amax) || (iter >= max_iter))
258 ls_res.
fval = cur_fval;
259 ls_res.
reg = cur_reg;
262 ret.push_back(ls_res);
267 prev_fval = cur_fval;
268 prev_lgrad = cur_lgrad;
270 cur_a = (cur_a + amax)*0.5;
286 for (uint32_t i=0; i < ncbm.
nCP; ++i)
323 libqp_state_T qp_exitflag={0, 0, 0, 0};
381 || icp_stats.
ACPs == NULL || icp_stats.
H_buff == NULL
408 best_risk = cur_risk;
413 bmrm_ll *CPList_head, *CPList_tail, *cp_ptr, *cp_list=NULL;
430 update_H(ncbm, CPList_head, CPList_tail, H, diag_H, _lambda, maxCPs, w_dim);
433 SG_SPRINT(
"%4d: tim=%.3lf, Fp=%lf, Fd=%lf, R=%lf\n",
434 ncbm.
nIter, tstop-tstart, ncbm.
Fp, ncbm.
Fd, cur_risk);
454 ncbm.
Fd = -qp_exitflag.QP;
462 for (uint32_t i=0; i < ncbm.
nCP; ++i)
479 clean_icp(&icp_stats, ncbm, &CPList_head, &CPList_tail,
489 for (uint32_t i=0; i < ncbm.
nCP; ++i)
496 bool calc_gap =
false;
502 for (uint32_t i=0; i < ncbm.
nCP; ++i)
505 cp_ptr = cp_ptr->
next;
506 scores[i] = cur_w.
dot(cur_w.
vector, a_1, w_dim);
514 SG_SPRINT(
"%4d: primal:%f dual:%f QP_gap:%f\n", ncbm.
nIter, PO, ncbm.
Fd, QP_gap)
521 if ((best_Fp - ncbm.
Fd) <= TolAbs)
524 if (ncbm.
nCP >= maxCPs)
528 if (ncbm.
nCP+3 >= maxCPs)
535 SG_SPRINT(
"%4d: tim=%.3lf, Fp=%lf, Fd=%lf, (Fp-Fd)=%lf, (Fp-Fd)/Fp=%lf, R=%lf, nCP=%d, nzA=%d, QPexitflag=%d, best_fp=%f, gap=%f\n",
537 (ncbm.
Fp-ncbm.
Fd)/ncbm.
Fp, cur_risk, ncbm.
nCP, ncbm.
nzA, qp_exitflag.exitflag, best_Fp, (best_Fp-ncbm.
Fd)/best_Fp);
542 std::vector<line_search_res> wbest_candidates;
552 update_H(ncbm, CPList_head, CPList_tail, H, diag_H, _lambda, maxCPs, w_dim);
560 wbest_candidates.push_back(ls);
571 uint32_t cp_min_approx = 0;
572 if (cp_min_approx || (ncbm.
nIter == 1))
584 std::vector<line_search_res> ls_res
587 if (ls_res[0].fval != ls_res[1].fval)
589 ls_res[0].gradient.vec1_plus_scalar_times_vec2(ls_res[0].gradient.vector, -_lambda, ls_res[0].solution.vector, w_dim);
596 - (ls_res[0].fval - ls_res[0].reg);
598 update_H(ncbm, CPList_head, CPList_tail, H, diag_H, _lambda, maxCPs, w_dim);
600 wbest_candidates.push_back(ls_res[0]);
603 ls_res[1].gradient.vec1_plus_scalar_times_vec2(ls_res[1].gradient.vector, -_lambda, ls_res[1].solution.vector, w_dim);
606 find_free_idx(map, maxCPs), ls_res[1].gradient.vector, w_dim);
609 = ls_res[1].solution.
dot(ls_res[1].solution.vector, ls_res[1].gradient.vector, w_dim)
610 - (ls_res[1].fval - ls_res[1].reg);
612 update_H(ncbm, CPList_head, CPList_tail, H, diag_H, _lambda, maxCPs, w_dim);
614 wbest_candidates.push_back(ls_res[1]);
616 if ((best_Fp <= ls_res[1].fval) && (astart != 1))
626 update_H(ncbm, CPList_head, CPList_tail, H, diag_H, _lambda, maxCPs, w_dim);
635 wbest_candidates.push_back(ls);
638 astar = ls_res[1].a * norm_dir;
641 SG_SPRINT(
"\t\tline search time: %.5lf\n", tstop-tstart)
646 SG_SPRINT(
"\t searching for the best Fp:\n")
647 for (
size_t i = 0; i < wbest_candidates.size(); i++)
650 SG_SPRINT(
"\t\t %d fcurrent: %.16lf\n", i, wbest_candidates[i].fval)
652 if (wbest_candidates[i].fval < best_Fp)
654 best_Fp = wbest_candidates[i].fval;
655 best_risk = wbest_candidates[i].fval - wbest_candidates[i].reg;
656 memcpy(best_w, wbest_candidates[i].solution.
vector,
sizeof(
float64_t)*w_dim);
657 memcpy(best_subgrad.
vector, wbest_candidates[i].gradient.vector,
sizeof(
float64_t)*w_dim);
668 index_t cp_idx = ncbm.
nCP-(wbest_candidates.size()-i);
673 wbest_candidates[i].gradient.vector, w_dim)
674 + (-1.0*bias[cp_idx]);
675 if (score > best_risk)
680 wbest_candidates[i].gradient.vector, w_dim);
683 = best_Fp - wbest_candidates[i].reg
685 wbest_candidates[i].gradient.vector, w_dim);
688 SG_SPRINT(
"CONFLICT Rbest=%.6lg score=%g L=%.6lg U=%.6lg\n", best_risk, score, L, U)
692 SG_SPRINT(
"%.6lf < %.6lf => changing bias[%d]=%g\n", L, U, cp_idx, L)
697 wbest_candidates[i].gradient.
zero();
700 cp_ptr = CPList_tail;
701 for (
size_t j = wbest_candidates.size()-1; i < j; --j)
703 cp_ptr = cp_ptr->
prev;
711 cp_ptr = CPList_head;
712 for (uint32_t j = 0; j < ncbm.
nCP-1; ++j)
715 cp_ptr = cp_ptr->
next;
726 wbest_candidates[i].gradient.vector, w_dim);
730 = best_Fp - wbest_candidates[i].reg
732 wbest_candidates[i].gradient.vector, w_dim);
735 SG_SPRINT(
"solved by changing nCP=%d bias:%g (%g)\n", cp_idx, bias[cp_idx], L)