hadoop - Getting null pointer exception when running Nutch crawler 2.2 with Hbase -
when run nutch command: ~/nutch/runtime/deploy$ bin/nutch crawl urls -dir /user/dlequoc/urls -depth 2 -topn 5, got following exception:
=======================================================
13/08/26 16:30:15 info mapred.jobclient: map 100% reduce 0% 13/08/26 16:30:29 info mapred.jobclient: task id : attempt_201308261546_0004_r_000000_0, status : failed java.lang.nullpointerexception @ org.apache.avro.util.utf8.(utf8.java:37) @ org.apache.nutch.crawl.generatorreducer.setup(generatorreducer.java:100) @ org.apache.hadoop.mapreduce.reducer.run(reducer.java:174) @ org.apache.hadoop.mapred.reducetask.runnewreducer(reducetask.java:649) @ org.apache.hadoop.mapred.reducetask.run(reducetask.java:417) @ org.apache.hadoop.mapred.child$4.run(child.java:255) @ java.security.accesscontroller.doprivileged(native method) @ javax.security.auth.subject.doas(subject.java:396) @ org.apache.hadoop.security.usergroupinformation.doas(usergroupinformation.java:1121) @ org.apache.hadoop.mapred.child.main(child.java:249) 13/08/26 16:30:32 info mapred.jobclient: task id : attempt_201308261546_0004_r_000001_0, status : failed java.lang.nullpointerexception @ org.apache.avro.util.utf8.(utf8.java:37) @ org.apache.nutch.crawl.generatorreducer.setup(generatorreducer.java:100) @ org.apache.hadoop.mapreduce.reducer.run(reducer.java:174) @ org.apache.hadoop.mapred.reducetask.runnewreducer(reducetask.java:649) @ org.apache.hadoop.mapred.reducetask.run(reducetask.java:417) @ org.apache.hadoop.mapred.child$4.run(child.java:255) @ java.security.accesscontroller.doprivileged(native method) @ javax.security.auth.subject.doas(subject.java:396) @ org.apache.hadoop.security.usergroupinformation.doas(usergroupinformation.java:1121) @ org.apache.hadoop.mapred.child.main(child.java:249) 13/08/26 16:30:32 info mapred.jobclient: task id : attempt_201308261546_0004_r_000005_0, status : failed java.lang.nullpointerexception @ org.apache.avro.util.utf8.(utf8.java:37) @ org.apache.nutch.crawl.generatorreducer.setup(generatorreducer.java:100) @ org.apache.hadoop.mapreduce.reducer.run(reducer.java:174) @ org.apache.hadoop.mapred.reducetask.runnewreducer(reducetask.java:649) @ org.apache.hadoop.mapred.reducetask.run(reducetask.java:417) @ org.apache.hadoop.mapred.child$4.run(child.java:255) @ java.security.accesscontroller.doprivileged(native method) @ javax.security.auth.subject.doas(subject.java:396) @ org.apache.hadoop.security.usergroupinformation.doas(usergroupinformation.java:1121) @ org.apache.hadoop.mapred.child.main(child.java:249) 13/08/26 16:30:32 info mapred.jobclient: task id : attempt_201308261546_0004_r_000004_0, status : failed java.lang.nullpointerexception @ org.apache.avro.util.utf8.(utf8.java:37) @ org.apache.nutch.crawl.generatorreducer.setup(generatorreducer.java:100) @ org.apache.hadoop.mapreduce.reducer.run(reducer.java:174) @ org.apache.hadoop.mapred.reducetask.runnewreducer(reducetask.java:649) @ org.apache.hadoop.mapred.reducetask.run(reducetask.java:417) @ org.apache.hadoop.mapred.child$4.run(child.java:255) @ java.security.accesscontroller.doprivileged(native method) @ javax.security.auth.subject.doas(subject.java:396) @ org.apache.hadoop.security.usergroupinformation.doas(usergroupinformation.java:1121) @ org.apache.hadoop.mapred.child.main(child.java:249) 13/08/26 16:30:32 info mapred.jobclient: task id : attempt_201308261546_0004_r_000002_0, status : failed java.lang.nullpointerexception @ org.apache.avro.util.utf8.(utf8.java:37) @ org.apache.nutch.crawl.generatorreducer.setup(generatorreducer.java:100) @ org.apache.hadoop.mapreduce.reducer.run(reducer.java:174) @ org.apache.hadoop.mapred.reducetask.runnewreducer(reducetask.java:649) @ org.apache.hadoop.mapred.reducetask.run(reducetask.java:417) @ org.apache.hadoop.mapred.child$4.run(child.java:255) @ java.security.accesscontroller.doprivileged(native method) @ javax.security.auth.subject.doas(subject.java:396) @ org.apache.hadoop.security.usergroupinformation.doas(usergroupinformation.java:1121) @ org.apache.hadoop.mapred.child.main(child.java:249) 13/08/26 16:30:32 info mapred.jobclient: task id : attempt_201308261546_0004_r_000003_0, status : failed java.lang.nullpointerexception @ org.apache.avro.util.utf8.(utf8.java:37) @ org.apache.nutch.crawl.generatorreducer.setup(generatorreducer.java:100) @ org.apache.hadoop.mapreduce.reducer.run(reducer.java:174) @ org.apache.hadoop.mapred.reducetask.runnewreducer(reducetask.java:649) @ org.apache.hadoop.mapred.reducetask.run(reducetask.java:417) @ org.apache.hadoop.mapred.child$4.run(child.java:255) @ java.security.accesscontroller.doprivileged(native method) @ javax.security.auth.subject.doas(subject.java:396) @ org.apache.hadoop.security.usergroupinformation.doas(usergroupinformation.java:1121) @ org.apache.hadoop.mapred.child.main(child.java:249) 13/08/26 16:30:44 info mapred.jobclient: task id : attempt_201308261546_0004_r_000001_1, status : failed java.lang.nullpointerexception @ org.apache.avro.util.utf8.(utf8.java:37) @ org.apache.nutch.crawl.generatorreducer.setup(generatorreducer.java:100) @ org.apache.hadoop.mapreduce.reducer.run(reducer.java:174) @ org.apache.hadoop.mapred.reducetask.runnewreducer(reducetask.java:649) @ org.apache.hadoop.mapred.reducetask.run(reducetask.java:417) @ org.apache.hadoop.mapred.child$4.run(child.java:255) @ java.security.accesscontroller.doprivileged(native method) @ javax.security.auth.subject.doas(subject.java:396) @ org.apache.hadoop.security.usergroupinformation.doas(usergroupinformation.java:1121) @ org.apache.hadoop.mapred.child.main(child.java:249) 13/08/26 16:30:47 info mapred.jobclient: task id : attempt_201308261546_0004_r_000000_1, status : failed java.lang.nullpointerexception @ org.apache.avro.util.utf8.(utf8.java:37) @ org.apache.nutch.crawl.generatorreducer.setup(generatorreducer.java:100) @ org.apache.hadoop.mapreduce.reducer.run(reducer.java:174) @ org.apache.hadoop.mapred.reducetask.runnewreducer(reducetask.java:649) @ org.apache.hadoop.mapred.reducetask.run(reducetask.java:417) @ org.apache.hadoop.mapred.child$4.run(child.java:255) @ java.security.accesscontroller.doprivileged(native method) @ javax.security.auth.subject.doas(subject.java:396) @ org.apache.hadoop.security.usergroupinformation.doas(usergroupinformation.java:1121) @ org.apache.hadoop.mapred.child.main(child.java:249) 13/08/26 16:30:47 info mapred.jobclient: task id : attempt_201308261546_0004_r_000005_1, status : failed java.lang.nullpointerexception @ org.apache.avro.util.utf8.(utf8.java:37) @ org.apache.nutch.crawl.generatorreducer.setup(generatorreducer.java:100) @ org.apache.hadoop.mapreduce.reducer.run(reducer.java:174) @ org.apache.hadoop.mapred.reducetask.runnewreducer(reducetask.java:649) @ org.apache.hadoop.mapred.reducetask.run(reducetask.java:417) @ org.apache.hadoop.mapred.child$4.run(child.java:255) @ java.security.accesscontroller.doprivileged(native method) @ javax.security.auth.subject.doas(subject.java:396) @ org.apache.hadoop.security.usergroupinformation.doas(usergroupinformation.java:1121) @ org.apache.hadoop.mapred.child.main(child.java:249) 13/08/26 16:30:47 info mapred.jobclient: task id : attempt_201308261546_0004_r_000002_1, status : failed java.lang.nullpointerexception @ org.apache.avro.util.utf8.(utf8.java:37) @ org.apache.nutch.crawl.generatorreducer.setup(generatorreducer.java:100) @ org.apache.hadoop.mapreduce.reducer.run(reducer.java:174) @ org.apache.hadoop.mapred.reducetask.runnewreducer(reducetask.java:649) @ org.apache.hadoop.mapred.reducetask.run(reducetask.java:417) @ org.apache.hadoop.mapred.child$4.run(child.java:255) @ java.security.accesscontroller.doprivileged(native method) @ javax.security.auth.subject.doas(subject.java:396) @ org.apache.hadoop.security.usergroupinformation.doas(usergroupinformation.java:1121) @ org.apache.hadoop.mapred.child.main(child.java:249) 13/08/26 16:30:47 info mapred.jobclient: task id : attempt_201308261546_0004_r_000004_1, status : failed java.lang.nullpointerexception @ org.apache.avro.util.utf8.(utf8.java:37) @ org.apache.nutch.crawl.generatorreducer.setup(generatorreducer.java:100) @ org.apache.hadoop.mapreduce.reducer.run(reducer.java:174) @ org.apache.hadoop.mapred.reducetask.runnewreducer(reducetask.java:649) @ org.apache.hadoop.mapred.reducetask.run(reducetask.java:417) @ org.apache.hadoop.mapred.child$4.run(child.java:255) @ java.security.accesscontroller.doprivileged(native method) @ javax.security.auth.subject.doas(subject.java:396) @ org.apache.hadoop.security.usergroupinformation.doas(usergroupinformation.java:1121) @ org.apache.hadoop.mapred.child.main(child.java:249) 13/08/26 16:30:47 info mapred.jobclient: task id : attempt_201308261546_0004_r_000003_1, status : failed java.lang.nullpointerexception @ org.apache.avro.util.utf8.(utf8.java:37) @ org.apache.nutch.crawl.generatorreducer.setup(generatorreducer.java:100) @ org.apache.hadoop.mapreduce.reducer.run(reducer.java:174) @ org.apache.hadoop.mapred.reducetask.runnewreducer(reducetask.java:649) @ org.apache.hadoop.mapred.reducetask.run(reducetask.java:417) @ org.apache.hadoop.mapred.child$4.run(child.java:255) @ java.security.accesscontroller.doprivileged(native method) @ javax.security.auth.subject.doas(subject.java:396) @ org.apache.hadoop.security.usergroupinformation.doas(usergroupinformation.java:1121) @ org.apache.hadoop.mapred.child.main(child.java:249) 13/08/26 16:30:59 info mapred.jobclient: task id : attempt_201308261546_0004_r_000000_2, status : failed java.lang.nullpointerexception @ org.apache.avro.util.utf8.(utf8.java:37) @ org.apache.nutch.crawl.generatorreducer.setup(generatorreducer.java:100) @ org.apache.hadoop.mapreduce.reducer.run(reducer.java:174) @ org.apache.hadoop.mapred.reducetask.runnewreducer(reducetask.java:649) @ org.apache.hadoop.mapred.reducetask.run(reducetask.java:417) @ org.apache.hadoop.mapred.child$4.run(child.java:255) @ java.security.accesscontroller.doprivileged(native method) @ javax.security.auth.subject.doas(subject.java:396) @ org.apache.hadoop.security.usergroupinformation.doas(usergroupinformation.java:1121) @ org.apache.hadoop.mapred.child.main(child.java:249) 13/08/26 16:30:59 info mapred.jobclient: task id : attempt_201308261546_0004_r_000002_2, status : failed java.lang.nullpointerexception @ org.apache.avro.util.utf8.(utf8.java:37) @ org.apache.nutch.crawl.generatorreducer.setup(generatorreducer.java:100) @ org.apache.hadoop.mapreduce.reducer.run(reducer.java:174) @ org.apache.hadoop.mapred.reducetask.runnewreducer(reducetask.java:649) @ org.apache.hadoop.mapred.reducetask.run(reducetask.java:417) @ org.apache.hadoop.mapred.child$4.run(child.java:255) @ java.security.accesscontroller.doprivileged(native method) @ javax.security.auth.subject.doas(subject.java:396) @ org.apache.hadoop.security.usergroupinformation.doas(usergroupinformation.java:1121) @ org.apache.hadoop.mapred.child.main(child.java:249) 13/08/26 16:30:59 info mapred.jobclient: task id : attempt_201308261546_0004_r_000001_2, status : failed java.lang.nullpointerexception @ org.apache.avro.util.utf8.(utf8.java:37) @ org.apache.nutch.crawl.generatorreducer.setup(generatorreducer.java:100) @ org.apache.hadoop.mapreduce.reducer.run(reducer.java:174) @ org.apache.hadoop.mapred.reducetask.runnewreducer(reducetask.java:649) @ org.apache.hadoop.mapred.reducetask.run(reducetask.java:417) @ org.apache.hadoop.mapred.child$4.run(child.java:255) @ java.security.accesscontroller.doprivileged(native method) @ javax.security.auth.subject.doas(subject.java:396) @ org.apache.hadoop.security.usergroupinformation.doas(usergroupinformation.java:1121) @ org.apache.hadoop.mapred.child.main(child.java:249) 13/08/26 16:31:02 info mapred.jobclient: task id : attempt_201308261546_0004_r_000005_2, status : failed java.lang.nullpointerexception @ org.apache.avro.util.utf8.(utf8.java:37) @ org.apache.nutch.crawl.generatorreducer.setup(generatorreducer.java:100) @ org.apache.hadoop.mapreduce.reducer.run(reducer.java:174) @ org.apache.hadoop.mapred.reducetask.runnewreducer(reducetask.java:649) @ org.apache.hadoop.mapred.reducetask.run(reducetask.java:417) @ org.apache.hadoop.mapred.child$4.run(child.java:255) @ java.security.accesscontroller.doprivileged(native method) @ javax.security.auth.subject.doas(subject.java:396) @ org.apache.hadoop.security.usergroupinformation.doas(usergroupinformation.java:1121) @ org.apache.hadoop.mapred.child.main(child.java:249) 13/08/26 16:31:05 info mapred.jobclient: task id : attempt_201308261546_0004_r_000003_2, status : failed java.lang.nullpointerexception @ org.apache.avro.util.utf8.(utf8.java:37) @ org.apache.nutch.crawl.generatorreducer.setup(generatorreducer.java:100) @ org.apache.hadoop.mapreduce.reducer.run(reducer.java:174) @ org.apache.hadoop.mapred.reducetask.runnewreducer(reducetask.java:649) @ org.apache.hadoop.mapred.reducetask.run(reducetask.java:417) @ org.apache.hadoop.mapred.child$4.run(child.java:255) @ java.security.accesscontroller.doprivileged(native method) @ javax.security.auth.subject.doas(subject.java:396) @ org.apache.hadoop.security.usergroupinformation.doas(usergroupinformation.java:1121) @ org.apache.hadoop.mapred.child.main(child.java:249) 13/08/26 16:31:11 info mapred.jobclient: task id : attempt_201308261546_0004_r_000004_2, status : failed java.lang.nullpointerexception @ org.apache.avro.util.utf8.(utf8.java:37) @ org.apache.nutch.crawl.generatorreducer.setup(generatorreducer.java:100) @ org.apache.hadoop.mapreduce.reducer.run(reducer.java:174) @ org.apache.hadoop.mapred.reducetask.runnewreducer(reducetask.java:649) @ org.apache.hadoop.mapred.reducetask.run(reducetask.java:417) @ org.apache.hadoop.mapred.child$4.run(child.java:255) @ java.security.accesscontroller.doprivileged(native method) @ javax.security.auth.subject.doas(subject.java:396) @ org.apache.hadoop.security.usergroupinformation.doas(usergroupinformation.java:1121) @ org.apache.hadoop.mapred.child.main(child.java:249) 13/08/26 16:31:20 info mapred.jobclient: job complete: job_201308261546_0004 13/08/26 16:31:20 info mapred.jobclient: counters: 24 13/08/26 16:31:20 info mapred.jobclient: job counters 13/08/26 16:31:20 info mapred.jobclient: launched reduce tasks=23 13/08/26 16:31:20 info mapred.jobclient: slots_millis_maps=113452 13/08/26 16:31:20 info mapred.jobclient: total time spent reduces waiting after reserving slots (ms)=0 13/08/26 16:31:20 info mapred.jobclient:
total time spent maps waiting after reserving slots (ms)=0 13/08/26 16:31:20 info mapred.jobclient: rack-local map tasks=1 13/08/26 16:31:20 info mapred.jobclient: launched map tasks=1 13/08/26 16:31:20 info mapred.jobclient: failed reduce tasks=1 13/08/26 16:31:20 info mapred.jobclient:
slots_millis_reduces=268210 13/08/26 16:31:20 info mapred.jobclient:
filesystemcounters 13/08/26 16:31:20 info mapred.jobclient:
file_bytes_read=25743276 13/08/26 16:31:20 info mapred.jobclient:
hdfs_bytes_read=704 13/08/26 16:31:20 info mapred.jobclient:
file_bytes_written=51473783 13/08/26 16:31:20 info mapred.jobclient:
file input format counters 13/08/26 16:31:20 info mapred.jobclient:
bytes read=0 13/08/26 16:31:20 info mapred.jobclient: map-reduce framework 13/08/26 16:31:20 info mapred.jobclient: map output materialized bytes=25720344 13/08/26 16:31:20 info mapred.jobclient:
combine output records=0 13/08/26 16:31:20 info mapred.jobclient:
map input records=333988 13/08/26 16:31:20 info mapred.jobclient:
physical memory (bytes) snapshot=449036288 13/08/26 16:31:20 info mapred.jobclient: spilled records=667976 13/08/26 16:31:20 info mapred.jobclient: map output bytes=25052332 13/08/26 16:31:20 info mapred.jobclient: cpu time spent (ms)=81870 13/08/26 16:31:20 info mapred.jobclient: total committed heap usage (bytes)=208011264 13/08/26 16:31:20 info mapred.jobclient: virtual memory (bytes) snapshot=740638720 13/08/26 16:31:20 info mapred.jobclient:
combine input records=0 13/08/26 16:31:20 info mapred.jobclient:
map output records=333988 13/08/26 16:31:20 info mapred.jobclient:
split_raw_bytes=704 exception in thread "main" java.lang.runtimeexception: job failed: name=generate: null, jobid=job_201308261546_0004 @ org.apache.nutch.util.nutchjob.waitforcompletion(nutchjob.java:54) @ org.apache.nutch.crawl.generatorjob.run(generatorjob.java:199) @ org.apache.nutch.crawl.crawler.runtool(crawler.java:68) @ org.apache.nutch.crawl.crawler.run(crawler.java:152) @ org.apache.nutch.crawl.crawler.run(crawler.java:250) @ org.apache.hadoop.util.toolrunner.run(toolrunner.java:65) @ org.apache.nutch.crawl.crawler.main(crawler.java:257) @ sun.reflect.nativemethodaccessorimpl.invoke0(native method) @ sun.reflect.nativemethodaccessorimpl.invoke(nativemethodaccessorimpl.java:39) @ sun.reflect.delegatingmethodaccessorimpl.invoke(delegatingmethodaccessorimpl.java:25) @ java.lang.reflect.method.invoke(method.java:597) @ org.apache.hadoop.util.runjar.main(runjar.java:156)
could please help? thanks!
command /bin/nutch crawl
deprecated in nutch 2.x. use /bin/crawl
instead.
Comments
Post a Comment