为方便用Streaming方式使用Hadoop,使用如下的脚本map_reduce.sh
--------------------------------------
#!/bin/bash
if [ $# -lt 3 ]
then
echo "Usage: map_reduce.sh <input> <output> <mapper> [reducer] [options]"
echo "You must provide at least 3 parameters."
exit
fi
OPTIONS=
REDUCER=
INPUT=$1
OUTPUT=$2
MAPPER=$3
if [ $# -eq 5 ]
then
OPTIONS=$5
fi
if [ "$4" = "" ]
then
REDUCER=NONE
else
REDUCER=$4
fi
$HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/contrib/streaming/hadoop-0.18.1-streaming.jar -input $INPUT -output $OUTPUT -mapper "$MAPPER" -reducer "$REDUCER" -jobconf stream.non.zero.exit.is.failure=false $OPTIONS