Package vsql_core :: Package utils :: Module low_rank_helper
[hide private]
[frames] | no frames]

Source Code for Module vsql_core.utils.low_rank_helper

  1  """ 
  2  This is a helper module for the low rank code 
  3  """ 
  4   
  5  import math 
  6  import random 
  7  # These is a helper module for the low rank code 
  8  # Should probably be factored into a common util. 
  9  # common utilities should be in their own directory (not in the model descriptions) 
 10   
11 -def dot(x,y):
12 """ 13 Computes dot products of the given two vectors and returns the dot product value 14 15 @type x: vector 16 @param x: vector 17 @type y: vector 18 @param y: vector 19 20 @rtype: double 21 @return: returns the dot product value of two vectors 22 """ 23 n = min(len(x),len(y)) 24 ret = 0.0 25 for i in range(n): 26 ret = ret + x[i]*y[i] 27 return ret
28
29 -def scale_and_add(dst,src,s):
30 """ 31 This method adds scaled vector to the given other vector 32 33 @type dst: vector 34 @param dst: dst vector 35 @type src: vector 36 @param src: src vector which will be scaled 37 @type s: double 38 @param s: scale factor 39 40 """ 41 n = min(len(dst),len(src)) 42 for i in range(n): 43 dst[i] = dst[i] + src[i]*s
44
45 -def ball_project(x, B, B2):
46 """ 47 This function normalizes given vector if its norm square is greater than the given threshold 48 49 @type x: vector 50 @param x: given vector to normalized 51 @type B: double 52 @param B: given norm 53 @type B2: double 54 @param B2: norm threshold 55 """ 56 norm_square = sum([xi*xi for xi in x]) 57 if(norm_square > B2): 58 norm = math.sqrt(norm_square) 59 for i in range(len(x)): 60 x[i] = x[i]* B/norm
61
62 -def make_random_vec(dim):
63 """ 64 Creates random vector with the given dimension 65 66 @type dim: number 67 @param dim: dimension of the random vector 68 69 @rtype: vector 70 @return: random vector with the given dim 71 """ 72 return [ random.gauss(0,1) * 1e-2 for i in range(dim) ]
73
74 -def make_initial(max_rank, nRows, nCols, B):
75 """ 76 Make initial function 77 @type max_rank: double 78 @param max_rank: max rank(dim) 79 @type nRows: number 80 @param nRows: # of rows 81 @type nCols: number 82 @param nCols: #of cols 83 @type B: double 84 @param B: B 85 86 @rtype: tuple 87 @return: returns L and R 88 """ 89 L = [] 90 R = [] 91 for i in range(nRows): 92 random_vec = make_random_vec(max_rank) 93 ball_project(random_vec, B, B*B) 94 L.append(random_vec) 95 96 for i in range(nCols): 97 random_vec = make_random_vec(max_rank) 98 ball_project(random_vec, B, B*B) 99 R.append(random_vec) 100 return (L,R)
101 102
103 -def get_bucket(nsplits, nrows, row, row_map):
104 """ 105 Gets bucket 106 @type nsplits: number 107 @param nsplits: splits 108 @type nrows: number 109 @param nrows: # of rows 110 @type row: number 111 @param row: row index 112 @type row_map: vector 113 @param row_map: row map 114 115 @rtype: number 116 @return: bucket 117 """ 118 row_i = row_map[row] 119 row_bucket_size = nrows/nsplits 120 return max(row_i/row_bucket_size, nsplits - 1)
121
122 -def split_a_model(nSplits, nRows, L, row_perm):
123 """ 124 125 """ 126 splits = [[] for z in nSplits] 127 for i in range(len(X)): 128 b = get_bucket(nsplits, nRows, i, row_perm) 129 splits[b].append(L[i]) 130 return splits
131
132 -def split_the_model(nSplits, nRows, nCols, L, R, row_perm, col_perm):
133 l_split = split_a_model(nSplits, nRows, L, row_perm) 134 r_split = split_a_model(nSplits, nCols, R, col_perm) 135 return [l_split, r_split]
136 137 #################### 138 # Tablename helpers 139 ##################
140 -def tablename_prefix(mid, epoch_sign):
141 return "victor_%d_%d" % (mid,epoch_sign)
142
143 -def tablename_lmodel(mid, epoch_sign, part):
144 return "%s_lmodel%d" % (tablename_prefix(mid,epoch_sign), part)
145 146
147 -def tablename_rmodel(mid, epoch_sign, part):
148 return "%s_rmodel%d" % (tablename_prefix(mid,epoch_sign), part)
149
150 -def tablename_data(mid,epoch_sign, part):
151 return "%s_data_%d" % (tablename_prefix(mid,epoch_sign), part)
152
153 -def get_l_chunk(nsplits, round, chunk):
154 return (round - chunk) % nsplits
155
156 -def get_r_chunk(nsplits, round, chunk):
157 return chunk
158
159 -def get_part(nsplits, round, chunk):
160 l = get_l_chunk(nsplits, round, chunk) 161 r = get_r_chunk(nsplits, round, chunk) 162 return l * nsplits + r
163
164 -class ReadList:
165 - def __init__(self, data):
166 self.d = iter(data)
167 168
169 - def readline(self, size=None):
170 try: 171 (i,vec) = self.iter.next() 172 except StopIteration: 173 return '' 174 else: 175 return "%d\t%s\n" % (i,vec)
176
177 - def read(self, size=None):
178 readline(self, size)
179
180 -def build_array_string(v):
181 return "{%s}" % (','.join([str(x) for x in v]))
182 183 #def copy_list(dbname, tablename, l): 184 # psql = psycopg2.connect(dbname) 185 # psql.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT) 186 # c = psql.cursor() 187 # c.execute("DROP TABLE IF EXISTS %s; CREATE TABLE %s (id int, vec float8[]);" % (tablename, tablename)) 188 # z = ["%d\t%s" % (id,build_array_string(vec)) for (id,vec) in l] 189 # f = StringIO.StringIO('\n'.join(z)) 190 # c.copy_from(f, tablename) 191