pom.xml
" file that list the required dependencies to execute the Spark-SQL sample application.SparkSession.builder().appName("Spark SQL-Hive").config(sparkConf).enableHiveSupport().getOrCreate();
sparkSession.catalog().listDatabases().show(false);
sparkSession.catalog().listTables("test").show(false);
sparkSession.sqlContext().sql("use test"); sparkSession.sql("insert into table test_external_table select t.* from (select 1, 'a') t");
sparkSession.sql("select * from test_external_table").show();
sparkConf.setMaster("local");
sparkConf.setMaster("spark://localhost:7077");
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>com.mtitek.spark</groupId> <artifactId>sparksql-hive</artifactId> <version>0.0.1-SNAPSHOT</version> <dependencies> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-core_2.11</artifactId> <version>2.4.3</version> </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-sql_2.11</artifactId> <version>2.4.3</version> </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-hive_2.11</artifactId> <version>2.4.3</version> </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-catalyst_2.11</artifactId> <version>2.4.3</version> </dependency> </dependencies> </project>
import org.apache.spark.SparkConf; import org.apache.spark.sql.AnalysisException; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.apache.spark.sql.SparkSession; public class Test { public static void main(String[] args) throws AnalysisException { final SparkConf sparkConf = new SparkConf(); sparkConf.setMaster("local"); // sparkConf.setMaster("spark://localhost:7077"); sparkConf.set("hive.metastore.uris", "thrift://localhost:9083"); // sparkConf.set("hive.exec.scratchdir", "/tmp/a-folder-that-the-current-user-has-permission-to-write-in"); final SparkSession sparkSession = SparkSession.builder().appName("Spark SQL-Hive").config(sparkConf) .enableHiveSupport().getOrCreate(); sparkSession.catalog().listDatabases().show(false); sparkSession.catalog().listTables("test_db").show(false); sparkSession.sqlContext().sql("use test_db"); sparkSession.sql("insert into table test_table_1 select t.* from (select 1, 'a') t"); Dataset<Row> tabledata = sparkSession.sql("select * from test_table_1"); tabledata.show(); } }
+-------+---------------------+-----------------------------------------------+ |name |description |locationUri | +-------+---------------------+-----------------------------------------------+ |default|Default Hive database|hdfs://localhost:8020/hive/warehouse | |test_db| |hdfs://localhost:8020/hive/warehouse/test_db.db| +-------+---------------------+-----------------------------------------------+ +------------+--------+-----------+---------+-----------+ |name |database|description|tableType|isTemporary| +------------+--------+-----------+---------+-----------+ |test_table_1|test_db |null |EXTERNAL |false | |test_table_2|test_db |null |EXTERNAL |false | +------------+--------+-----------+---------+-----------+ +------+------+ |field1|field2| +------+------+ | 1| a| +------+------+
java.lang.RuntimeException: The root scratch dir: /tmp/hive on HDFS should be writable. Current permissions are: rwxrwxr-x
hive.exec.scratchdir
" has the value to "/tmp/hive
"/tmp/hive
" may be owned by another user's processes running on the same host where you are running the Spark SQL application.sudo chmod -R 777 /tmp/hive/
").hive.exec.scratchdir
" and initialize its values with another directory that the user running the Spark SQL application can write to:
sparkConf.set("hive.exec.scratchdir", "/tmp/a-folder-that-the-current-user-has-permission-to-write-in");
org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.AccessControlException): Permission denied: user=mtitek, access=WRITE, inode="/test_db/test_table_1":hive:hadoop:drwxr-xr-x
/test_db/test_table_1
".hdfs dfs -chmod -R 777 /test_db/test_table_1
").hdfs dfs -setfacl -R -m user:mtitek:rwx hdfs://localhost:8020/test_db/