我是靠谱客的博主 爱听歌戒指,这篇文章主要介绍Alink在线学习(Online Learning)之Java示例【六】,现在分享给大家,希望可以做个参考。

最后,贴出完整代码,感兴趣的读者可以运行实验。

注意,由于示例中需要演示中间结果,有很多打印或执行的方法,我现将调用这些方法的代码设为了注释,读者可以自己释放某些代码,查看运行效果。

复制代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
package com.alibaba.alink; import com.alibaba.alink.operator.batch.BatchOperator; import com.alibaba.alink.operator.batch.classification.LogisticRegressionTrainBatchOp; import com.alibaba.alink.operator.batch.source.CsvSourceBatchOp; import com.alibaba.alink.operator.stream.StreamOperator; import com.alibaba.alink.operator.stream.dataproc.JsonValueStreamOp; import com.alibaba.alink.operator.stream.dataproc.SplitStreamOp; import com.alibaba.alink.operator.stream.evaluation.EvalBinaryClassStreamOp; import com.alibaba.alink.operator.stream.onlinelearning.FtrlPredictStreamOp; import com.alibaba.alink.operator.stream.onlinelearning.FtrlTrainStreamOp; import com.alibaba.alink.operator.stream.source.CsvSourceStreamOp; import com.alibaba.alink.pipeline.Pipeline; import com.alibaba.alink.pipeline.PipelineModel; import com.alibaba.alink.pipeline.dataproc.StandardScaler; import com.alibaba.alink.pipeline.feature.FeatureHasher; public class FTRLExample { public static void main(String[] args) throws Exception { //new TextSourceBatchOp() // .setFilePath("http://alink-release.oss-cn-beijing.aliyuncs.com/data-files/avazu-small.csv") // .firstN(10) // .print(); String schemaStr = "id string, click string, dt string, C1 string, banner_pos int, site_id string, site_domain string, " + "site_category string, app_id string, app_domain string, app_category string, device_id string, " + "device_ip string, device_model string, device_type string, device_conn_type string, C14 int, C15 int, " + "C16 int, C17 int, C18 int, C19 int, C20 int, C21 int"; CsvSourceBatchOp trainBatchData = new CsvSourceBatchOp() .setFilePath("http://alink-release.oss-cn-beijing.aliyuncs.com/data-files/avazu-small.csv") .setSchemaStr(schemaStr); //trainBatchData.firstN(10).print(); String labelColName = "click"; String[] selectedColNames = new String[] { "C1", "banner_pos", "site_category", "app_domain", "app_category", "device_type", "device_conn_type", "C14", "C15", "C16", "C17", "C18", "C19", "C20", "C21", "site_id", "site_domain", "device_id", "device_model"}; String[] categoryColNames = new String[] { "C1", "banner_pos", "site_category", "app_domain", "app_category", "device_type", "device_conn_type", "site_id", "site_domain", "device_id", "device_model"}; String[] numericalColNames = new String[] { "C14", "C15", "C16", "C17", "C18", "C19", "C20", "C21"}; // result column name of feature enginerring String vecColName = "vec"; int numHashFeatures = 30000; // setup feature enginerring pipeline Pipeline feature_pipeline = new Pipeline() .add( new StandardScaler() .setSelectedCols(numericalColNames) ) .add( new FeatureHasher() .setSelectedCols(selectedColNames) .setCategoricalCols(categoryColNames) .setOutputCol(vecColName) .setNumFeatures(numHashFeatures) ); // fit and save feature pipeline model String FEATURE_PIPELINE_MODEL_FILE = "/Users/yangxu/alink/data/temp/feature_pipe_model.csv"; //feature_pipeline.fit(trainBatchData).save(FEATURE_PIPELINE_MODEL_FILE); // //BatchOperator.execute(); // prepare stream train data CsvSourceStreamOp data = new CsvSourceStreamOp() .setFilePath("http://alink-release.oss-cn-beijing.aliyuncs.com/data-files/avazu-ctr-train-8M.csv") .setSchemaStr(schemaStr) .setIgnoreFirstLine(true); // split stream to train and eval data SplitStreamOp spliter = new SplitStreamOp().setFraction(0.5).linkFrom(data); StreamOperator train_stream_data = spliter; StreamOperator test_stream_data = spliter.getSideOutput(0); // load pipeline model PipelineModel feature_pipelineModel = PipelineModel.load(FEATURE_PIPELINE_MODEL_FILE); // train initial batch model LogisticRegressionTrainBatchOp lr = new LogisticRegressionTrainBatchOp() .setVectorCol(vecColName) .setLabelCol(labelColName) .setWithIntercept(true) .setMaxIter(10); BatchOperator initModel = feature_pipelineModel.transform(trainBatchData).link(lr); // ftrl train FtrlTrainStreamOp model = new FtrlTrainStreamOp(initModel) .setVectorCol(vecColName) .setLabelCol(labelColName) .setWithIntercept(true) .setAlpha(0.1) .setBeta(0.1) .setL1(0.01) .setL2(0.01) .setTimeInterval(10) .setVectorSize(numHashFeatures) .linkFrom(feature_pipelineModel.transform(train_stream_data)); // ftrl predict FtrlPredictStreamOp predResult = new FtrlPredictStreamOp(initModel) .setVectorCol(vecColName) .setPredictionCol("pred") .setReservedCols(new String[] {labelColName}) .setPredictionDetailCol("details") .linkFrom(model, feature_pipelineModel.transform(test_stream_data)); //predResult.sample(0.0001).print(); // //StreamOperator.execute(); // ftrl eval predResult.link( new EvalBinaryClassStreamOp() .setLabelCol(labelColName) .setPredictionCol("pred") .setPredictionDetailCol("details") .setTimeInterval(10) ).link( new JsonValueStreamOp() .setSelectedCol("Data") .setReservedCols(new String[] {"Statistics"}) .setOutputCols(new String[] {"Accuracy", "AUC", "ConfusionMatrix"}) .setJsonPath(new String[] {"$.Accuracy", "$.AUC", "$.ConfusionMatrix"}) ) .print(); //StreamOperator.execute(); } }

最后

以上就是爱听歌戒指最近收集整理的关于Alink在线学习(Online Learning)之Java示例【六】的全部内容,更多相关Alink在线学习(Online内容请搜索靠谱客的其他文章。

本图文内容来源于网友提供,作为学习参考使用,或来自网络收集整理,版权属于原作者所有。
点赞(53)

评论列表共有 0 条评论

立即
投稿
返回
顶部