00001
00012 template <class T>
00013 Hazy_Sgd<T>::Hazy_Sgd(Storage_Manager<T> *sm, IncrementalSGD<T> *sgd, Skiing &_ski, hazy_model::strategy s)
00014 : ski(_ski), hm(sgd->getModel(),s) {
00015
00016
00017 pthread_rwlock_init(&_rwlock, NULL);
00018 this->sgd = sgd;
00019 _high_low_water_valid = true;
00020 _last_sort_model = sgd->getModel();
00021 _dim = _last_sort_model.dim;
00022 st_man = sm;
00023 if(hazy_model::isHazy(s))
00024 st_man->resort(hm);
00025 }
00026
00030 template<class T>
00031 void
00032 Hazy_Sgd<T>::updateModel() {
00033
00034 pthread_rwlock_wrlock(&_rwlock);
00035
00036 hm._model = sgd->getModel();
00037
00038
00039 if( !hm.isEager() ) {
00040
00041 if(hm._strategy == hazy_model::LAZY_HAZY) { _high_low_water_valid = false; }
00042
00043 pthread_rwlock_unlock(&_rwlock);
00044 return;
00045 }
00046
00047 if( hm._strategy == hazy_model::EAGER_NAIVE) {
00048 assert(hm._strategy == hazy_model::EAGER_NAIVE);
00049 double _notused = 0.0;
00050 st_man->incrementalUpdate(hm, _notused);
00051
00052 pthread_rwlock_unlock(&_rwlock);
00053 return;
00054 }
00055
00056 assert(hm._strategy == hazy_model::EAGER_HAZY);
00057
00058
00059 update_low_high_water();
00060
00061 if(ski.shouldResort()) {
00062 Timer t(true);
00063 st_man->resort(hm);
00064 _last_sort_model = model(hm._model);
00065
00066 ski.doResort(t.stop());
00067 hm.low_water = 0.0, hm.high_water = 0.0;
00068 } else {
00069 LOGGING_ONLY(std::cout << "storage manager update with eager hazy" << std::endl;);
00070 double waste_time = 0.0;
00071 st_man->incrementalUpdate(hm, waste_time);
00072 ski.updateAccCost(waste_time);
00073 LOGGING_ONLY(std::cout << "acc cost become: " << ski.getAccCost() << std::endl;);
00074 }
00075
00076
00077 pthread_rwlock_unlock(&_rwlock);
00078 }
00079
00089 template <class T>
00090 void
00091 Hazy_Sgd<T>::updateModel(T featureArray, int classOfExample) {
00092 LOGGING_ONLY(std::cout << "update model called" << std::endl;);
00093
00094 LOGGING_ONLY(std::cout << "[hazy_sgd] ski.acc_cost = " << ski.getAccCost() << ", resort cost: " << ski.getResortCost() << ", baseline: " << ski.getSVMBaselineUpdate() << std::endl;);
00095
00096
00097 pthread_rwlock_wrlock(&_rwlock);
00098
00099 bool model_changed = sgd->addExample(classOfExample, featureArray);
00100
00101 if(model_changed) {
00102
00103 LOGGING_ONLY(std::cout << "model changed" << std::endl;);
00104 hm.invalidate_db_model();
00105 } else {
00106 LOGGING_ONLY(std::cout << "model didn't change" << std::endl;);
00107
00108 pthread_rwlock_unlock(&_rwlock);
00109 return;
00110 }
00111
00112 pthread_rwlock_unlock(&_rwlock);
00113
00114 updateModel();
00115 LOGGING_ONLY(std::cout << "[hazy_sgd] {after update} ski.acc_cost = " << ski.getAccCost() << ", resort cost: " << ski.getResortCost() << ", baseline: " << ski.getSVMBaselineUpdate() << std::endl;);
00116 }
00117
00118 template<class T>
00119 struct _id_label_vector_entry {
00120 int id;
00121 T x;
00122 int label;
00123 static int parse_tuple(PGresult *res, int index, _id_label_vector_entry &c) {
00124 c.id = atoi(PQgetvalue(res, index, 0));
00125 c.label = atoi(PQgetvalue(res, index, 1));
00126 std::string f_vec = PQgetvalue(res, index, 2);
00127 int r = c.x.fromPSQL(f_vec);
00128 return r;
00129 }
00130 static bool _entity_compare(const _id_label_vector_entry &x, const _id_label_vector_entry &y) {
00131 return x.eps < y.eps;
00132 }
00133 };
00134
00135 template <class T>
00136 void
00137 Hazy_Sgd<T>::deleteModel(std::string db_name, std::string table_name) {
00138 _connection_map::iterator i = connections.find(db_name);
00139 Hazy_Database *db_conn;
00140
00141 if(i == connections.end()) {
00142 db_conn = new Hazy_Database(db_name);
00143 connections[db_name] = db_conn;
00144 }
00145 else
00146 db_conn = i->second;
00147
00148 std::vector< _id_label_vector_entry<T> > _tuples;
00149 LOGGING_ONLY(Timer retrieve_parse_timer(true););
00150 std::string retrieve_query = "SELECT id, label, feature_vector FROM " + table_name + ";";
00151 db_conn->postgresTupleParser<_id_label_vector_entry <T> >(retrieve_query.c_str(), _id_label_vector_entry<T>::parse_tuple,
00152 _tuples);
00153 LOGGING_ONLY(std::cout << "[deleteModel] time elapsed for retrieve & parse from training table: " << retrieve_parse_timer.stop() << std::endl;);
00154 LOGGING_ONLY(std::cout << "[deleteModel] entity size: " << _tuples.size() << std::endl;);
00155
00156 LOGGING_ONLY(std::cout << "delete model called" << std::endl;);
00157
00158 LOGGING_ONLY(std::cout << "[hazy_sgd] ski.acc_cost = " << ski.getAccCost() << ", resort cost: " << ski.getResortCost() << ", baseline: " << ski.getSVMBaselineUpdate() << std::endl;);
00159
00160
00161 pthread_rwlock_wrlock(&_rwlock);
00162
00163 bool model_changed = false;
00164
00165
00166 LOGGING_ONLY(Timer reset_model_timer(true););
00167 sgd->resetModel();
00168 LOGGING_ONLY(std::cout << "[deleteModel] reset model timer: " << reset_model_timer.stop() << std::endl;);
00169
00170 LOGGING_ONLY(Timer learn_timer(true););
00171 for(unsigned int i = 0; i < _tuples.size(); i ++) {
00172 int classOfExample = _tuples[i].label;
00173 T featureArray = _tuples[i].x;
00174 model_changed = sgd->addExample(classOfExample, featureArray);
00175 }
00176 LOGGING_ONLY(std::cout << "[deleteModel] time for retraining: " << learn_timer.stop() << std::endl;);
00177
00178 if(model_changed) {
00179
00180 LOGGING_ONLY(std::cout << "model changed" << std::endl;);
00181 hm.invalidate_db_model();
00182 } else {
00183 LOGGING_ONLY(std::cout << "model didn't change" << std::endl;);
00184
00185 pthread_rwlock_unlock(&_rwlock);
00186 return;
00187 }
00188
00189 pthread_rwlock_unlock(&_rwlock);
00190
00191 updateModel();
00192 LOGGING_ONLY(std::cout << "[hazy_sgd] {after update} ski.acc_cost = " << ski.getAccCost() << ", resort cost: " << ski.getResortCost() << ", baseline: " << ski.getSVMBaselineUpdate() << std::endl;);
00193 }
00194
00200 template <class T>
00201 void
00202 Hazy_Sgd<T>::readEntityClass(key entity_id, sClass &c) {
00203 pthread_rwlock_rdlock(&_rwlock);
00204 if(hm._strategy == hazy_model::LAZY_HAZY && !_high_low_water_valid) { update_low_high_water(); _high_low_water_valid = true; }
00205 st_man->getEntityClass(entity_id, c, hm);
00206 pthread_rwlock_unlock(&_rwlock);
00207 }
00208
00214 template <class T>
00215 void
00216 Hazy_Sgd<T>::readEntityClass(T vec, sClass &c) {
00217
00218 assert(hm.isLazy());
00219
00220 pthread_rwlock_rdlock(&_rwlock);
00221 c = sgd->classifyExample(vec) ? 1 : 0;
00222
00223 pthread_rwlock_unlock(&_rwlock);
00224 }
00225
00232 template <class T>
00233 void
00234 Hazy_Sgd<T>::readEntityClass(T vec, double eps, sClass &c) {
00235
00236
00237 assert(hm._strategy == hazy_model::LAZY_HAZY);
00238 if(!_high_low_water_valid) { update_low_high_water(); _high_low_water_valid = true; }
00239 if(eps > hm.high_water) { c = 1; return; }
00240 if(eps < hm.low_water) { c = 0; return; }
00241
00242 readEntityClass(vec, c);
00243 }
00244
00250 template <class T>
00251 void
00252 Hazy_Sgd<T>::readNumInClass(sClass c, int &nClass) {
00253 double waste_time = 0;
00254
00255 pthread_rwlock_rdlock(&_rwlock);
00256 if(!_high_low_water_valid) { update_low_high_water(); _high_low_water_valid = true; }
00257
00258 st_man->getNumInClass(c, nClass, hm, waste_time);
00259
00260 if(hm._strategy == hazy_model::LAZY_HAZY) {
00261 ski.updateAccCost(waste_time);
00262 }
00263
00264 pthread_rwlock_unlock(&_rwlock);
00265 }
00266
00272 template <class T>
00273 void
00274 Hazy_Sgd<T>::holder_difference_models(double &delta_w, double &delta_b) {
00275 struct model current = sgd->getModel();
00276 delta_b = current.bias - _last_sort_model.bias;
00277 delta_w = 0.0;
00278 for(int d = 1; d < _dim; d ++) {
00279 delta_w = std::max(delta_w, fabs((current.w.get(d) * current.wscale - _last_sort_model.w.get(d)*_last_sort_model.wscale)));
00280 }
00281
00282
00283 delta_w *= ski.getMFactor();
00284 }