docker pull tangchenyang/spark:v1.1docker run -itd --privileged --name spark \
-p 9870:9870 -p 8088:8088 -p 8042:8042 \
-p 4040:4040 -p 15002:15002 \
-p 10000:10000 \
tangchenyang/spark:v1.1docker exec -it spark bash# list folders/files
hdfs dfs -ls /
# put file
hdfs dfs -put /root/software/hadoop-3.3.5/README.txt /
# list folders/files
hdfs dfs -ls /
# get file
cd ~
hdfs dfs -get /README.txt .
ls .# 上传测试文件
hdfs dfs -mkdir /input
hdfs dfs -put /root/software/hadoop-3.3.5/README.txt /input/
hdfs dfs -ls /input
# 运行WordCount
hadoop jar ${HADOOP_HOME}/share/hadoop/mapreduce/hadoop-mapreduce-examples-3.3.5.jar wordcount /input/ /output
# 查看输出
hdfs dfs -ls /output
hdfs dfs -cat /output/part-r-00000# RUNNING 时
yarn application --list
# KILL RUNNING App
yarn application --kill <application_id>
# FINISHED 时
yarn application --list --appStates ALLbeeline -u jdbc:hive2://localhost:10000 -e "CREATE database test_db";
beeline -u jdbc:hive2://localhost:10000 -e "SHOW DATABASES";spark-sql -e "show databases"spark-submit \
--class org.apache.spark.examples.SparkPi \
--master yarn \
--deploy-mode client \
${SPARK_HOME}/examples/jars/spark-examples_2.12-3.5.5.jar \
1000