#!/usr/bin/env bash
set +x
HADOOP_STREAMING_JAR=/usr/local/hadoop/share/hadoop/tools/lib/hadoop-streaming.jar
INPUT_PATH=/data/ids_part
OUT_PATH=~/stdout/
hdfs dfs -rm -r -skipTrash $OUT_PATH
yarn jar $HADOOP_STREAMING_JAR \
-files
map.py \
-mapper 'python3
map.py' \
-input $INPUT_PATH \
-output $OUT_PATH
~
~
~
~
~
~
~
~