我是靠谱客的博主 热心手机,这篇文章主要介绍【源码解析】Deep Reinforcement Learning for Online Computation Offloading,现在分享给大家,希望可以做个参考。
main.py
复制代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67# ################################################################# # Deep Reinforcement Learning for Online Offloading in Wireless Powered Mobile-Edge Computing Networks # # This file contains the main code of DROO. It loads the training samples saved in ./data/data_#.mat, splits the samples into two parts (training and testing data constitutes 80% and 20%), trains the DNN with training and validation samples, and finally tests the DNN with test data. # # Input: ./data/data_#.mat # Data samples are generated according to the CD method presented in [2]. There are 30,000 samples saved in each ./data/data_#.mat, where # is the user number. Each data sample includes # ----------------------------------------------------------------- # | wireless channel gain | input_h | # ----------------------------------------------------------------- # | computing mode selection | output_mode | # ----------------------------------------------------------------- # | energy broadcasting parameter | output_a | # ----------------------------------------------------------------- # | transmit time of wireless device | output_tau | # ----------------------------------------------------------------- # | weighted sum computation rate | output_obj | # ----------------------------------------------------------------- # # # References: # [1] 1. Liang Huang, Suzhi Bi, and Ying-Jun Angela Zhang, "Deep Reinforcement Learning for Online Offloading in Wireless Powered Mobile-Edge Computing Networks," in IEEE Transactions on Mobile Computing, early access, 2019, DOI:10.1109/TMC.2019.2928811. # [2] S. Bi and Y. J. Zhang, “Computation rate maximization for wireless powered mobile-edge computing with binary computation offloading,” IEEE Trans. Wireless Commun., vol. 17, no. 6, pp. 4177-4190, Jun. 2018. # # version 1.0 -- July 2018. Written by Liang Huang (lianghuang AT zjut.edu.cn) # ################################################################
复制代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115import scipy.io as sio # import scipy.io for .mat file I/ import numpy as np # import numpy # for tensorflow2 from memoryTF2 import MemoryDNN from optimization import bisection import time def plot_rate( rate_his, rolling_intv = 50): import matplotlib.pyplot as plt import pandas as pd import matplotlib as mpl rate_array = np.asarray(rate_his) df = pd.DataFrame(rate_his) mpl.style.use('seaborn') fig, ax = plt.subplots(figsize=(15,8)) # rolling_intv = 20 plt.plot(np.arange(len(rate_array))+1, df.rolling(rolling_intv, min_periods=1).mean(), 'b') plt.fill_between(np.arange(len(rate_array))+1, df.rolling(rolling_intv, min_periods=1).min()[0], df.rolling(rolling_intv, min_periods=1).max()[0], color = 'b', alpha = 0.2) plt.ylabel('Normalized Computation Rate') plt.xlabel('Time Frames') plt.show() def save_to_txt(rate_his, file_path): with open(file_path, 'w') as f: for rate in rate_his: f.write("%s n" % rate) if __name__ == "__main__": ''' This algorithm generates K modes from DNN, and chooses with largest reward. The mode with largest reward is stored in the memory, which is further used to train the DNN. Adaptive K is implemented. K = max(K, K_his[-memory_size]) ''' N = 10 # number of users n = 30000 # number of time frames K = N # initialize K = N decoder_mode = 'OP' # the quantization mode could be 'OP' (Order-preserving) or 'KNN' Memory = 1024 # capacity of memory structure Delta = 32 # Update interval for adaptive K print('#user = %d, #channel=%d, K=%d, decoder = %s, Memory = %d, Delta = %d'%(N,n,K,decoder_mode, Memory, Delta)) # Load data channel = sio.loadmat('./data/data_%d' %N)['input_h'] rate = sio.loadmat('./data/data_%d' %N)['output_obj'] # this rate is only used to plot figures; never used to train DROO. # increase h to close to 1 for better training; it is a trick widely adopted in deep learning channel = channel * 1000000 # generate the train and test data sample index # data are splitted as 80:20 # training data are randomly sampled with duplication if n > total data size split_idx = int(.8* len(channel)) num_test = min(len(channel) - split_idx, n - int(.8* n)) # training data size mem = MemoryDNN(net = [N, 120, 80, N], learning_rate = 0.01, training_interval=10, batch_size=128, memory_size=Memory ) start_time=time.time() rate_his = [] rate_his_ratio = [] mode_his = [] k_idx_his = [] K_his = [] for i in range(n): if i % (n//10) == 0: print("%0.1f"%(i/n)) if i> 0 and i % Delta == 0: # index counts from 0 if Delta > 1: max_k = max(k_idx_his[-Delta:-1]) +1; else: max_k = k_idx_his[-1] +1; K = min(max_k +1, N) if i < n - num_test: # training i_idx = i % split_idx else: # test i_idx = i - n + num_test + split_idx h = channel[i_idx,:] # the action selection must be either 'OP' or 'KNN' m_list = mem.decode(h, K, decoder_mode) r_list = [] for m in m_list: r_list.append(bisection(h/1000000, m)[0]) # encode the mode with largest reward mem.encode(h, m_list[np.argmax(r_list)]) # the main code for DROO training ends here # the following codes store some interested metrics for illustrations # memorize the largest reward rate_his.append(np.max(r_list)) rate_his_ratio.append(rate_his[-1] / rate[i_idx][0]) # record the index of largest reward k_idx_his.append(np.argmax(r_list)) # record K in case of adaptive K K_his.append(K) mode_his.append(m_list[np.argmax(r_list)]) total_time=time.time()-start_time mem.plot_cost() plot_rate(rate_his_ratio) print("Averaged normalized computation rate:", sum(rate_his_ratio[-num_test: -1])/num_test) print('Total time consumed:%s'%total_time) print('Average time per channel:%s'%(total_time/n)) # save data into txt save_to_txt(k_idx_his, "k_idx_his.txt") save_to_txt(K_his, "K_his.txt") save_to_txt(mem.cost_his, "cost_his.txt") save_to_txt(rate_his_ratio, "rate_his_ratio.txt") save_to_txt(mode_his, "mode_his.txt")
memory.py
复制代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130# ################################################################# # This file contains the main DROO operations, including building DNN, # Storing data sample, Training DNN, and generating quantized binary offloading decisions. # version 1.0 -- January 2020. Written based on Tensorflow 2 by Weijian Pan and # Liang Huang (lianghuang AT zjut.edu.cn) # ################################################################# from __future__ import print_function import tensorflow as tf from tensorflow import keras from tensorflow.keras import layers import numpy as np print(tf.__version__) print(tf.keras.__version__) # DNN network for memory class MemoryDNN: def __init__( self, net, learning_rate = 0.01, training_interval=10, batch_size=100, memory_size=1000, output_graph=False ): self.net = net # the size of the DNN self.training_interval = training_interval # learn every #training_interval self.lr = learning_rate self.batch_size = batch_size self.memory_size = memory_size # store all binary actions self.enumerate_actions = [] # stored # memory entry self.memory_counter = 1 # store training cost self.cost_his = [] # initialize zero memory [h, m] self.memory = np.zeros((self.memory_size, self.net[0] + self.net[-1])) # construct memory network self._build_net() def _build_net(self): self.model = keras.Sequential([ layers.Dense(self.net[1], activation='relu'), # the first hidden layer layers.Dense(self.net[2], activation='relu'), # the second hidden layer layers.Dense(self.net[-1], activation='sigmoid') # the output layer ]) self.model.compile(optimizer=keras.optimizers.Adam(lr=self.lr), loss=tf.losses.binary_crossentropy, metrics=['accuracy']) def remember(self, h, m): # replace the old memory with new memory idx = self.memory_counter % self.memory_size self.memory[idx, :] = np.hstack((h, m)) self.memory_counter += 1 def encode(self, h, m): # encoding the entry self.remember(h, m) # train the DNN every 10 step # if self.memory_counter> self.memory_size / 2 and self.memory_counter % self.training_interval == 0: if self.memory_counter % self.training_interval == 0: self.learn() def learn(self): # sample batch memory from all memory if self.memory_counter > self.memory_size: sample_index = np.random.choice(self.memory_size, size=self.batch_size) else: sample_index = np.random.choice(self.memory_counter, size=self.batch_size) batch_memory = self.memory[sample_index, :] h_train = batch_memory[:, 0: self.net[0]] m_train = batch_memory[:, self.net[0]:] # print(h_train) # (128, 10) # print(m_train) # (128, 10) # train the DNN hist = self.model.fit(h_train, m_train, verbose=0) self.cost = hist.history['loss'][0] assert(self.cost > 0) self.cost_his.append(self.cost) def decode(self, h, k = 1, mode = 'OP'): # to have batch dimension when feed into tf placeholder h = h[np.newaxis, :] m_pred = self.model.predict(h) if mode == 'OP': return self.knm(m_pred[0], k) elif mode == 'KNN': return self.knn(m_pred[0], k) else: print("The action selection must be 'OP' or 'KNN'") def knm(self, m, k = 1): # return k order-preserving binary actions m_list = [] # generate the first binary offloading decision with respect to equation (8) m_list.append(1*(m>0.5)) if k > 1: # generate the remaining K-1 binary offloading decisions with respect to equation (9) m_abs = abs(m-0.5) idx_list = np.argsort(m_abs)[:k-1] for i in range(k-1): if m[idx_list[i]] >0.5: # set the hat{x}_{t,(k-1)} to 0 m_list.append(1*(m - m[idx_list[i]] > 0)) else: # set the hat{x}_{t,(k-1)} to 1 m_list.append(1*(m - m[idx_list[i]] >= 0)) return m_list def knn(self, m, k = 1): # list all 2^N binary offloading actions if len(self.enumerate_actions) == 0: import itertools self.enumerate_actions = np.array(list(map(list, itertools.product([0, 1], repeat=self.net[0])))) # the 2-norm sqd = ((self.enumerate_actions - m)**2).sum(1) idx = np.argsort(sqd) return self.enumerate_actions[idx[:k]] def plot_cost(self): import matplotlib.pyplot as plt plt.plot(np.arange(len(self.cost_his))*self.training_interval, self.cost_his) plt.ylabel('Training Loss') plt.xlabel('Time Frames') plt.show()
optimization.py
复制代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169# -*- coding: utf-8 -*- """ Created on Tue Jan 9 10:45:26 2018 @author: Administrator """ import numpy as np from scipy import optimize from scipy.special import lambertw import scipy.io as sio # import scipy.io for .mat file I/ import time def plot_gain( gain_his): import matplotlib.pyplot as plt import pandas as pd import matplotlib as mpl gain_array = np.asarray(gain_his) df = pd.DataFrame(gain_his) mpl.style.use('seaborn') fig, ax = plt.subplots(figsize=(15,8)) rolling_intv = 20 plt.plot(np.arange(len(gain_array))+1, df.rolling(rolling_intv, min_periods=1).mean(), 'b') plt.fill_between(np.arange(len(gain_array))+1, df.rolling(rolling_intv, min_periods=1).min()[0], df.rolling(rolling_intv, min_periods=1).max()[0], color = 'b', alpha = 0.2) plt.ylabel('Gain ratio') plt.xlabel('learning steps') plt.show() def bisection(h, M, weights=[]): # the bisection algorithm proposed by Suzhi BI # average time to find the optimal: 0.012535839796066284 s # parameters and equations o=100 p=3 u=0.7 eta1=((u*p)**(1.0/3))/o ki=10**-26 eta2=u*p/10**-10 B=2*10**6 Vu=1.1 epsilon=B/(Vu*np.log(2)) x = [] # a =x[0], and tau_j = a[1:] M0=np.where(M==0)[0] M1=np.where(M==1)[0] hi=np.array([h[i] for i in M0]) hj=np.array([h[i] for i in M1]) if len(weights) == 0: # default weights [1, 1.5, 1, 1.5, 1, 1.5, ...] weights = [1.5 if i%2==1 else 1 for i in range(len(M))] wi=np.array([weights[M0[i]] for i in range(len(M0))]) wj=np.array([weights[M1[i]] for i in range(len(M1))]) def sum_rate(x): sum1=sum(wi*eta1*(hi/ki)**(1.0/3)*x[0]**(1.0/3)) sum2=0 for i in range(len(M1)): sum2+=wj[i]*epsilon*x[i+1]*np.log(1+eta2*hj[i]**2*x[0]/x[i+1]) return sum1+sum2 def phi(v, j): return 1/(-1-1/(lambertw(-1/(np.exp( 1 + v/wj[j]/epsilon))).real)) def p1(v): p1 = 0 for j in range(len(M1)): p1 += hj[j]**2 * phi(v, j) return 1/(1 + p1 * eta2) def Q(v): sum1 = sum(wi*eta1*(hi/ki)**(1.0/3))*p1(v)**(-2/3)/3 sum2 = 0 for j in range(len(M1)): sum2 += wj[j]*hj[j]**2/(1 + 1/phi(v,j)) return sum1 + sum2*epsilon*eta2 - v def tau(v, j): return eta2*hj[j]**2*p1(v)*phi(v,j) # bisection starts here delta = 0.005 UB = 999999999 LB = 0 while UB - LB > delta: v = (float(UB) + LB)/2 if Q(v) > 0: LB = v else: UB = v x.append(p1(v)) for j in range(len(M1)): x.append(tau(v, j)) return sum_rate(x), x[0], x[1:] def cd_method(h): N = len(h) M0 = np.random.randint(2,size = N) gain0,a,Tj= bisection(h,M0) g_list = [] M_list = [] while True: for j in range(0,N): M = np.copy(M0) M[j] = (M[j]+1)%2 gain,a,Tj= bisection(h,M) g_list.append(gain) M_list.append(M) g_max = max(g_list) if g_max > gain0: gain0 = g_max M0 = M_list[g_list.index(g_max)] else: break return gain0, M0 if __name__ == "__main__": h=np.array([6.06020304235508*10**-6,1.10331933767028*10**-5,1.00213540309998*10**-7,1.21610610942759*10**-6,1.96138838395145*10**-6,1.71456339592966*10**-6,5.24563569673585*10**-6,5.89530717142197*10**-7,4.07769429231962*10**-6,2.88333185798682*10**-6]) M=np.array([1,0,0,0,1,0,0,0,0,0]) # h=np.array([1.00213540309998*10**-7,1.10331933767028*10**-5,6.06020304235508*10**-6,1.21610610942759*10**-6,1.96138838395145*10**-6,1.71456339592966*10**-6,5.24563569673585*10**-6,5.89530717142197*10**-7,4.07769429231962*10**-6,2.88333185798682*10**-6]) # M=np.array([0,0,1,0,1,0,0,0,0,0]) # h = np.array([4.6368924987170947*10**-7, 1.3479411763648968*10**-7, 7.174945246007612*10**-6, 2.5590719803595445*10**-7, 3.3189928740379023*10**-6, 1.2109071327755575*10**-5, 2.394278475886022*10**-6, 2.179121774067472*10**-6, 5.5213902658478367*10**-8, 2.168778154948169*10**-7, 2.053227965874453*10**-6, 7.002952297466865*10**-8, 7.594077851181444*10**-8, 7.904048961975136*10**-7, 8.867218892023474*10**-7, 5.886007653360979*10**-6, 2.3470565740563855*10**-6, 1.387049627074303*10**-7, 3.359475870531776*10**-7, 2.633733784949562*10**-7, 2.189895264149453*10**-6, 1.129177795302099*10**-5, 1.1760290137191366*10**-6, 1.6588656719735275*10**-7, 1.383637788476638*10**-6, 1.4485928387351664*10**-6, 1.4262265958416598*10**-6, 1.1779725004265418*10**-6, 7.738218993031842*10**-7, 4.763534225174186*10**-6]) # M =np.array( [0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,]) # time the average speed of bisection algorithm # repeat = 1 # M =np.random.randint(2, size=(repeat,len(h))) # start_time=time.time() # for i in range(repeat): # gain,a,Tj= bisection(h,M[i,:]) # total_time=time.time()-start_time # print('time_cost:%s'%(total_time/repeat)) gain,a,Tj= bisection(h,M) print('y:%s'%gain) print('a:%s'%a) print('Tj:%s'%Tj) # test CD method. Given h, generate the max mode gain0, M0 = cd_method(h) print('max y:%s'%gain0) print(M0) # test all data K = [10, 20, 30] # number of users N = 1000 # number of channel for k in K: # Load data channel = sio.loadmat('./data/data_%d' %int(k))['input_h'] gain = sio.loadmat('./data/data_%d' %int(k))['output_obj'] start_time=time.time() gain_his = [] gain_his_ratio = [] mode_his = [] for i in range(N): if i % (N//10) == 0: print("%0.1f"%(i/N)) i_idx = i h = channel[i_idx,:] # the CD method gain0, M0 = cd_method(h) # memorize the largest reward gain_his.append(gain0) gain_his_ratio.append(gain_his[-1] / gain[i_idx][0]) mode_his.append(M0) total_time=time.time()-start_time print('time_cost:%s'%total_time) print('average time per channel:%s'%(total_time/N)) plot_gain(gain_his_ratio) print("gain/max ratio: ", sum(gain_his_ratio)/N)
demo_alternate_weights.py
复制代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133# ################################################################# # Deep Reinforcement Learning for Online Offloading in Wireless Powered Mobile-Edge Computing Networks # # This file contains a demo evaluating the performance of DROO with laternating-weight WDs. It loads the training samples with default WDs' weights from ./data/data_10.mat and with alternated weights from ./data/data_10_WeightsAlternated.mat. The channel gains in both files are the same. However, the optimal offloading mode, resource allocation, and the maximum computation rate in 'data_10_WeightsAlternated.mat' are recalculated since WDs' weights are alternated. # # References: # [1] 1. Liang Huang, Suzhi Bi, and Ying-jun Angela Zhang, “Deep Reinforcement Learning for Online Offloading in Wireless Powered Mobile-Edge Computing Networks”, on arxiv:1808.01977 # # version 1.0 -- April 2019. Written by Liang Huang (lianghuang AT zjut.edu.cn) # ################################################################# import scipy.io as sio # import scipy.io for .mat file I/ import numpy as np # import numpy from memory import MemoryDNN from optimization import bisection from main import plot_rate, save_to_txt import time def alternate_weights(case_id=0): ''' Alternate the weights of all WDs. Note that, the maximum computation rate need be recomputed by solving (P2) once any WD's weight is changed. Input: case_id = 0 for default weights; case_id = 1 for alternated weights. Output: The alternated weights and the corresponding rate. ''' # set alternated weights weights=[[1,1.5,1,1.5,1,1.5,1,1.5,1,1.5],[1.5,1,1.5,1,1.5,1,1.5,1,1.5,1]] # load the corresponding maximum computation rate if case_id == 0: # by defaulst, case_id = 0 rate = sio.loadmat('./data/data_10')['output_obj'] else: # alternate weights for all WDs, case_id = 1 rate = sio.loadmat('./data/data_10_WeightsAlternated')['output_obj'] return weights[case_id], rate if __name__ == "__main__": ''' This demo evaluate DROO with laternating-weight WDs. We evaluate an extreme case by alternating the weights of all WDs between 1 and 1.5 at the same time, specifically, at time frame 6,000 and 8,000. ''' N = 10 # number of users n = 10000 # number of time frames, <= 10,000 K = N # initialize K = N decoder_mode = 'OP' # the quantization mode could be 'OP' (Order-preserving) or 'KNN' Memory = 1024 # capacity of memory structure Delta = 32 # Update interval for adaptive K print('#user = %d, #channel=%d, K=%d, decoder = %s, Memory = %d, Delta = %d'%(N,n,K,decoder_mode, Memory, Delta)) # Load data channel = sio.loadmat('./data/data_%d' %N)['input_h'] rate = sio.loadmat('./data/data_%d' %N)['output_obj'] # increase h to close to 1 for better training; it is a trick widely adopted in deep learning channel = channel * 1000000 # generate the train and test data sample index # data are splitted as 80:20 # training data are randomly sampled with duplication if n > total data size split_idx = int(.8* len(channel)) num_test = min(len(channel) - split_idx, n - int(.8* n)) # training data size mem = MemoryDNN(net = [N, 120, 80, N], learning_rate = 0.01, training_interval=10, batch_size=128, memory_size=Memory ) start_time=time.time() rate_his = [] rate_his_ratio = [] mode_his = [] k_idx_his = [] K_his = [] h = channel[0,:] # initilize the weights by setting case_id = 0. weight, rate = alternate_weights(0) print("WD weights at time frame %d:"%(0), weight) for i in range(n): # for dynamic number of WDs if i ==0.6*n: weight, rate = alternate_weights(1) print("WD weights at time frame %d:"%(i), weight) if i ==0.8*n: weight, rate = alternate_weights(0) print("WD weights at time frame %d:"%(i), weight) if i % (n//10) == 0: print("%0.1f"%(i/n)) if i> 0 and i % Delta == 0: # index counts from 0 if Delta > 1: max_k = max(k_idx_his[-Delta:-1]) +1; else: max_k = k_idx_his[-1] +1; K = min(max_k +1, N) i_idx = i h = channel[i_idx,:] # the action selection must be either 'OP' or 'KNN' m_list = mem.decode(h, K, decoder_mode) r_list = [] for m in m_list: # only acitve users are used to compute the rate r_list.append(bisection(h/1000000, m, weight)[0]) # memorize the largest reward rate_his.append(np.max(r_list)) rate_his_ratio.append(rate_his[-1] / rate[i_idx][0]) # record the index of largest reward k_idx_his.append(np.argmax(r_list)) # record K in case of adaptive K K_his.append(K) # save the mode with largest reward mode_his.append(m_list[np.argmax(r_list)]) # if i <0.6*n: # encode the mode with largest reward mem.encode(h, m_list[np.argmax(r_list)]) total_time=time.time()-start_time mem.plot_cost() plot_rate(rate_his_ratio) print("Averaged normalized computation rate:", sum(rate_his_ratio[-num_test: -1])/num_test) print('Total time consumed:%s'%total_time) print('Average time per channel:%s'%(total_time/n)) # save data into txt save_to_txt(k_idx_his, "k_idx_his.txt") save_to_txt(K_his, "K_his.txt") save_to_txt(mem.cost_his, "cost_his.txt") save_to_txt(rate_his_ratio, "rate_his_ratio.txt") save_to_txt(mode_his, "mode_his.txt")
demo_on_off.py
复制代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163# ################################################################# # Deep Reinforcement Learning for Online Offloading in Wireless Powered Mobile-Edge Computing Networks # # This file contains a demo evaluating the performance of DROO by randomly turning on/off some WDs. It loads the training samples from ./data/data_#.mat, where # denotes the number of active WDs in the MEC network. Note that, the maximum computation rate need be recomputed by solving (P2) once a WD is turned off/on. # # References: # [1] 1. Liang Huang, Suzhi Bi, and Ying-jun Angela Zhang, “Deep Reinforcement Learning for Online Offloading in Wireless Powered Mobile-Edge Computing Networks”, submitted to IEEE Journal on Selected Areas in Communications. # # version 1.0 -- April 2019. Written by Liang Huang (lianghuang AT zjut.edu.cn) # ################################################################# import scipy.io as sio # import scipy.io for .mat file I/ import numpy as np # import numpy from memory import MemoryDNN from optimization import bisection from main import plot_rate, save_to_txt import time def WD_off(channel, N_active, N): # turn off one WD if N_active > 5: # current we support half of WDs are off N_active = N_active - 1 # set the (N-active-1)th channel to close to 0 # since all channels in each time frame are randomly generated, we turn of the WD with greatest index channel[:,N_active] = channel[:, N_active] / 1000000 # a programming trick,such that we can recover its channel gain once the WD is turned on again. print(" The %dth WD is turned on."%(N_active +1)) # update the expected maximum computation rate rate = sio.loadmat('./data/data_%d' %N_active)['output_obj'] return channel, rate, N_active def WD_on(channel, N_active, N): # turn on one WD if N_active < N: N_active = N_active + 1 # recover (N_active-1)th channel channel[:,N_active-1] = channel[:, N_active-1] * 1000000 print(" The %dth WD is turned on."%(N_active)) # update the expected maximum computation rate rate = sio.loadmat('./data/data_%d' %N_active)['output_obj'] return channel, rate, N_active if __name__ == "__main__": ''' This demo evaluate DROO for MEC networks where WDs can be occasionally turned off/on. After DROO converges, we randomly turn off on one WD at each time frame 6,000, 6,500, 7,000, and 7,500, and then turn them on at time frames 8,000, 8,500, and 9,000. At time frame 9,500 , we randomly turn off two WDs, resulting an MEC network with 8 acitve WDs. ''' N = 10 # number of users N_active = N # number of effective users N_off = 0 # number of off-users n = 10000 # number of time frames, <= 10,000 K = N # initialize K = N decoder_mode = 'OP' # the quantization mode could be 'OP' (Order-preserving) or 'KNN' Memory = 1024 # capacity of memory structure Delta = 32 # Update interval for adaptive K print('#user = %d, #channel=%d, K=%d, decoder = %s, Memory = %d, Delta = %d'%(N,n,K,decoder_mode, Memory, Delta)) # Load data channel = sio.loadmat('./data/data_%d' %N)['input_h'] rate = sio.loadmat('./data/data_%d' %N)['output_obj'] # increase h to close to 1 for better training; it is a trick widely adopted in deep learning channel = channel * 1000000 channel_bak = channel.copy() # generate the train and test data sample index # data are splitted as 80:20 # training data are randomly sampled with duplication if n > total data size split_idx = int(.8* len(channel)) num_test = min(len(channel) - split_idx, n - int(.8* n)) # training data size mem = MemoryDNN(net = [N, 120, 80, N], learning_rate = 0.01, training_interval=10, batch_size=128, memory_size=Memory ) start_time=time.time() rate_his = [] rate_his_ratio = [] mode_his = [] k_idx_his = [] K_his = [] h = channel[0,:] for i in range(n): # for dynamic number of WDs if i ==0.6*n: print("At time frame %d:"%(i)) channel, rate, N_active = WD_off(channel, N_active, N) if i ==0.65*n: print("At time frame %d:"%(i)) channel, rate, N_active = WD_off(channel, N_active, N) if i ==0.7*n: print("At time frame %d:"%(i)) channel, rate, N_active = WD_off(channel, N_active, N) if i ==0.75*n: print("At time frame %d:"%(i)) channel, rate, N_active = WD_off(channel, N_active, N) if i ==0.8*n: print("At time frame %d:"%(i)) channel, rate, N_active = WD_on(channel, N_active, N) if i ==0.85*n: print("At time frame %d:"%(i)) channel, rate, N_active = WD_on(channel, N_active, N) if i ==0.9*n: print("At time frame %d:"%(i)) channel, rate, N_active = WD_on(channel, N_active, N) channel, rate, N_active = WD_on(channel, N_active, N) if i == 0.95*n: print("At time frame %d:"%(i)) channel, rate, N_active = WD_off(channel, N_active, N) channel, rate, N_active = WD_off(channel, N_active, N) if i % (n//10) == 0: print("%0.1f"%(i/n)) if i> 0 and i % Delta == 0: # index counts from 0 if Delta > 1: max_k = max(k_idx_his[-Delta:-1]) +1; else: max_k = k_idx_his[-1] +1; K = min(max_k +1, N) i_idx = i h = channel[i_idx,:] # the action selection must be either 'OP' or 'KNN' m_list = mem.decode(h, K, decoder_mode) r_list = [] for m in m_list: # only acitve users are used to compute the rate r_list.append(bisection(h[0:N_active]/1000000, m[0:N_active])[0]) # memorize the largest reward rate_his.append(np.max(r_list)) rate_his_ratio.append(rate_his[-1] / rate[i_idx][0]) # record the index of largest reward k_idx_his.append(np.argmax(r_list)) # record K in case of adaptive K K_his.append(K) # save the mode with largest reward mode_his.append(m_list[np.argmax(r_list)]) # if i <0.6*n: # encode the mode with largest reward mem.encode(h, m_list[np.argmax(r_list)]) total_time=time.time()-start_time mem.plot_cost() plot_rate(rate_his_ratio) print("Averaged normalized computation rate:", sum(rate_his_ratio[-num_test: -1])/num_test) print('Total time consumed:%s'%total_time) print('Average time per channel:%s'%(total_time/n)) # save data into txt save_to_txt(k_idx_his, "k_idx_his.txt") save_to_txt(K_his, "K_his.txt") save_to_txt(mem.cost_his, "cost_his.txt") save_to_txt(rate_his_ratio, "rate_his_ratio.txt") save_to_txt(mode_his, "mode_his.txt")
最后
以上就是热心手机最近收集整理的关于【源码解析】Deep Reinforcement Learning for Online Computation Offloading的全部内容,更多相关【源码解析】Deep内容请搜索靠谱客的其他文章。
本图文内容来源于网友提供,作为学习参考使用,或来自网络收集整理,版权属于原作者所有。
发表评论 取消回复