请稍等 ...
×

采纳答案成功!

向帮助你的同学说点啥吧!感谢那些助人为乐的人

虽然数据导出来了,但是报错了,之前是正常的,加了saveAsTextFile后就报错了

17/07/14 11:07:58 ERROR Utils: Aborting task

java.lang.ArrayIndexOutOfBoundsException: 599

at sun.util.calendar.BaseCalendar.getCalendarDateFromFixedDate(BaseCalendar.java:453)

at java.util.GregorianCalendar.computeFields(GregorianCalendar.java:2397)

at java.util.GregorianCalendar.computeFields(GregorianCalendar.java:2312)

at java.util.Calendar.setTimeInMillis(Calendar.java:1804)

at java.util.Calendar.setTime(Calendar.java:1770)

at java.text.SimpleDateFormat.format(SimpleDateFormat.java:943)

at java.text.SimpleDateFormat.format(SimpleDateFormat.java:936)

at java.text.DateFormat.format(DateFormat.java:345)

at com.imooc.spark.DataUtils$.parse(DataUtils.scala:20)

at com.imooc.spark.SparkStatFormatJob$$anonfun$main$1.apply(SparkStatFormatJob.scala:27)

at com.imooc.spark.SparkStatFormatJob$$anonfun$main$1.apply(SparkStatFormatJob.scala:18)

at scala.collection.Iterator$$anon$11.next(Iterator.scala:409)

at scala.collection.Iterator$$anon$11.next(Iterator.scala:409)

at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1$$anonfun$13$$anonfun$apply$7.apply$mcV$sp(PairRDDFunctions.scala:1211)

at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1$$anonfun$13$$anonfun$apply$7.apply(PairRDDFunctions.scala:1210)

at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1$$anonfun$13$$anonfun$apply$7.apply(PairRDDFunctions.scala:1210)

at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1341)

at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1$$anonfun$13.apply(PairRDDFunctions.scala:1218)

at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1$$anonfun$13.apply(PairRDDFunctions.scala:1197)

at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)

at org.apache.spark.scheduler.Task.run(Task.scala:99)

at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:282)

at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)

at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)

at java.lang.Thread.run(Thread.java:748)

17/07/14 11:07:58 ERROR Executor: Exception in task 1.0 in stage 0.0 (TID 1)

java.lang.ArrayIndexOutOfBoundsException: 599

at sun.util.calendar.BaseCalendar.getCalendarDateFromFixedDate(BaseCalendar.java:453)

at java.util.GregorianCalendar.computeFields(GregorianCalendar.java:2397)

at java.util.GregorianCalendar.computeFields(GregorianCalendar.java:2312)

at java.util.Calendar.setTimeInMillis(Calendar.java:1804)

at java.util.Calendar.setTime(Calendar.java:1770)

at java.text.SimpleDateFormat.format(SimpleDateFormat.java:943)

at java.text.SimpleDateFormat.format(SimpleDateFormat.java:936)

at java.text.DateFormat.format(DateFormat.java:345)

at com.imooc.spark.DataUtils$.parse(DataUtils.scala:20)

at com.imooc.spark.SparkStatFormatJob$$anonfun$main$1.apply(SparkStatFormatJob.scala:27)

at com.imooc.spark.SparkStatFormatJob$$anonfun$main$1.apply(SparkStatFormatJob.scala:18)

at scala.collection.Iterator$$anon$11.next(Iterator.scala:409)

at scala.collection.Iterator$$anon$11.next(Iterator.scala:409)

at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1$$anonfun$13$$anonfun$apply$7.apply$mcV$sp(PairRDDFunctions.scala:1211)

at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1$$anonfun$13$$anonfun$apply$7.apply(PairRDDFunctions.scala:1210)

at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1$$anonfun$13$$anonfun$apply$7.apply(PairRDDFunctions.scala:1210)

at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1341)

at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1$$anonfun$13.apply(PairRDDFunctions.scala:1218)

at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1$$anonfun$13.apply(PairRDDFunctions.scala:1197)

at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)

at org.apache.spark.scheduler.Task.run(Task.scala:99)

at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:282)

at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)

at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)

at java.lang.Thread.run(Thread.java:748)

17/07/14 11:07:58 WARN TaskSetManager: Lost task 1.0 in stage 0.0 (TID 1, localhost, executor driver): java.lang.ArrayIndexOutOfBoundsException: 599

at sun.util.calendar.BaseCalendar.getCalendarDateFromFixedDate(BaseCalendar.java:453)

at java.util.GregorianCalendar.computeFields(GregorianCalendar.java:2397)

at java.util.GregorianCalendar.computeFields(GregorianCalendar.java:2312)

at java.util.Calendar.setTimeInMillis(Calendar.java:1804)

at java.util.Calendar.setTime(Calendar.java:1770)

at java.text.SimpleDateFormat.format(SimpleDateFormat.java:943)

at java.text.SimpleDateFormat.format(SimpleDateFormat.java:936)

at java.text.DateFormat.format(DateFormat.java:345)

at com.imooc.spark.DataUtils$.parse(DataUtils.scala:20)

at com.imooc.spark.SparkStatFormatJob$$anonfun$main$1.apply(SparkStatFormatJob.scala:27)

at com.imooc.spark.SparkStatFormatJob$$anonfun$main$1.apply(SparkStatFormatJob.scala:18)

at scala.collection.Iterator$$anon$11.next(Iterator.scala:409)

at scala.collection.Iterator$$anon$11.next(Iterator.scala:409)

at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1$$anonfun$13$$anonfun$apply$7.apply$mcV$sp(PairRDDFunctions.scala:1211)

at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1$$anonfun$13$$anonfun$apply$7.apply(PairRDDFunctions.scala:1210)

at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1$$anonfun$13$$anonfun$apply$7.apply(PairRDDFunctions.scala:1210)

at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1341)

at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1$$anonfun$13.apply(PairRDDFunctions.scala:1218)

at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1$$anonfun$13.apply(PairRDDFunctions.scala:1197)

at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)

at org.apache.spark.scheduler.Task.run(Task.scala:99)

at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:282)

at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)

at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)

at java.lang.Thread.run(Thread.java:748)

17/07/14 11:07:58 INFO DAGScheduler: ResultStage 0 (saveAsTextFile at SparkStatFormatJob.scala:28) failed in 1.158 s due to Job aborted due to stage failure: Task 1 in stage 0.0 failed 1 times, most recent failure: Lost task 1.0 in stage 0.0 (TID 1, localhost, executor driver): java.lang.ArrayIndexOutOfBoundsException: 599

at sun.util.calendar.BaseCalendar.getCalendarDateFromFixedDate(BaseCalendar.java:453)

at java.util.GregorianCalendar.computeFields(GregorianCalendar.java:2397)

at java.util.GregorianCalendar.computeFields(GregorianCalendar.java:2312)

at java.util.Calendar.setTimeInMillis(Calendar.java:1804)

at java.util.Calendar.setTime(Calendar.java:1770)

at java.text.SimpleDateFormat.format(SimpleDateFormat.java:943)

at java.text.SimpleDateFormat.format(SimpleDateFormat.java:936)

at java.text.DateFormat.format(DateFormat.java:345)

at com.imooc.spark.DataUtils$.parse(DataUtils.scala:20)

at com.imooc.spark.SparkStatFormatJob$$anonfun$main$1.apply(SparkStatFormatJob.scala:27)

at com.imooc.spark.SparkStatFormatJob$$anonfun$main$1.apply(SparkStatFormatJob.scala:18)

at scala.collection.Iterator$$anon$11.next(Iterator.scala:409)

at scala.collection.Iterator$$anon$11.next(Iterator.scala:409)

at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1$$anonfun$13$$anonfun$apply$7.apply$mcV$sp(PairRDDFunctions.scala:1211)

at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1$$anonfun$13$$anonfun$apply$7.apply(PairRDDFunctions.scala:1210)

at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1$$anonfun$13$$anonfun$apply$7.apply(PairRDDFunctions.scala:1210)

at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1341)

at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1$$anonfun$13.apply(PairRDDFunctions.scala:1218)

at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1$$anonfun$13.apply(PairRDDFunctions.scala:1197)

at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)

at org.apache.spark.scheduler.Task.run(Task.scala:99)

at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:282)

at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)

at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)

at java.lang.Thread.run(Thread.java:748)


正在回答

2回答

在代码中添加日志输出,定位到哪一行数据有问题先

0 回复 有任何疑惑可以回复我~
  • 提问者 进击的大黄鸭 #1
    继续往下看了,换成FastDateFormat.getInstance就不报错了
    回复 有任何疑惑可以回复我~ 2017-07-14 11:33:48
  • 提问者 进击的大黄鸭 #2
    非常感谢!
    回复 有任何疑惑可以回复我~ 2017-07-14 11:37:39
提问者 进击的大黄鸭 2017-07-14 11:37:18

在windows本地的话貌似要设置hadoop的环境变量?不然会报空指针错误:java.lang.NullPointerException 设置方法看这篇文章 http://blog.csdn.net/kimyoungvon/article/details/51308651 设置好了就会正常输入output 我是这样的 有类似问题的同学可以借鉴

2 回复 有任何疑惑可以回复我~
问题已解决,确定采纳
还有疑问,暂不采纳
意见反馈 帮助中心 APP下载
官方微信