1 | val tmp1 = trainDF1.withColumn( "Sex" , when(trainDF1( "Sex" ) === "female" ,0).when(trainDF1( "Sex" ) === "male" ,1)) |
我用idea开发的时候,将Sex列中的性别用01标识,但是说when不识别,这是什么情况呢?
api文档中的例子也不行
1 2 3 4 | // Scala: people. select (when(people( "gender" ) === "male" , 0) .when(people( "gender" ) === "female" , 1) .otherwise(2)) |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 | package com.imooc.ml import org.apache.spark.sql.SparkSession /** * Created by bacon on 2017 /7/21 . */ object Titanic { def main(args: Array[String]) { System.setProperty( "hadoop.home.dir" , "C:\\Users\\bacon\\winutils" ) val spark = SparkSession.builder().appName( "SparkSession" ).master( "local[2]" ).getOrCreate() val dataDF = spark. read .option( "header" , "true" ).option( "inferSchema" , "true" ).csv( "D:\\learn\\BD\\ML\\data\\Titanic\\train.csv" ) val trainDF1 = dataDF. select ( "Survived" , "Pclass" , "Sex" , "Age" , "SibSp" , "Parch" , "Fare" , "Embarked" ) // 将female设为0,male设为1 val tmp1 = trainDF1.withColumn( "Sex" , when(trainDF1( "Sex" ) === "female" ,0).when(trainDF1( "Sex" ) === "male" ,1)) spark.stop() } } |