第九章 9-7 基于模型的排序
小简老师,这一节讲的比较粗糙,代码也不全,给的源码有些函数不知道在哪里定义的,麻烦补充下,谢谢!
下面这两个函数:
axpy
val scanStr = TableMapReduceUtil.convertScanToString(scan)
这一节LR 排序跳跃很大, 感觉很难串起来 也没有讲GBDT的排序,所以也没有 讲LR + GBDT 的排序, 看课程介绍是有的, 这一节很重要, 麻烦小简老师补充下, 谢谢!
def compute(data: Vector, label: Double, weights: Vector, cumGradient: Vector): Double = {
val margin = -1.0 * dot(data, weights)
val multiplier = (1.0 / (1.0 + math.exp(margin))) - label
axpy(multiplier, data, cumGradient)
if (label > 0) {
log1p(margin)
} else {
log1p(margin) - margin
}
}
//读取特定数据
def scanData(tableName: String,
cf: String,
column: String,
start: String,
end: String): Unit = {
hbaseConfig.set(TableInputFormat.INPUT_TABLE,
tableName)
val scan = new Scan(Bytes.toBytes(start),
Bytes.toBytes(end))
scan.addFamily(Bytes.toBytes(cf))
scan.addColumn(Bytes.toBytes(cf), Bytes.toBytes(column))
// val scanStr = TableMapReduceUtil.convertScanToString(scan)
val scanStr = TableMapReduceUtil.convertScanToString(scan)
hbaseConfig.set(TableInputFormat.SCAN, scanStr)
val hbaseRDD: RDD[(ImmutableBytesWritable, Result)]
= sc.newAPIHadoopRDD(hbaseConfig,
classOf[TableInputFormat],
classOf[ImmutableBytesWritable],
classOf[Result])
val rs = hbaseRDD.map(_._2)
.map(r => {
(r.getValue(
Bytes.toBytes(cf),
Bytes.toBytes(column)
))
})
.collect()
}