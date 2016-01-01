Integrating Amazon Glue with ClickHouse

Amazon Glue is a fully managed, serverless data integration service provided by Amazon Web Services (AWS). It simplifies the process of discovering, preparing, and transforming data for analytics, machine learning, and application development.

Although there is no Glue ClickHouse connector available yet, the official JDBC connector can be leveraged to connect and integrate with ClickHouse:

import com . amazonaws . services . glue . util . Job

import com . amazonaws . services . glue . util . GlueArgParser

import com . amazonaws . services . glue . GlueContext

import org . apache . spark . SparkContext

import org . apache . spark . sql . SparkSession

import org . apache . spark . sql . DataFrame

import scala . collection . JavaConverters . _

import com . amazonaws . services . glue . log . GlueLogger







object GlueJob {

def main ( sysArgs : Array [ String ] ) {

val sc : SparkContext = new SparkContext ( )

val glueContext : GlueContext = new GlueContext ( sc )

val spark : SparkSession = glueContext . getSparkSession

val logger = new GlueLogger

import spark . implicits . _



val args = GlueArgParser . getResolvedOptions ( sysArgs , Seq ( "JOB_NAME" ) . toArray )

Job . init ( args ( "JOB_NAME" ) , glueContext , args . asJava )





val jdbcUrl = "jdbc:ch://{host}:{port}/{schema}"

val jdbcProperties = new java . util . Properties ( )

jdbcProperties . put ( "user" , "default" )

jdbcProperties . put ( "password" , "*******" )

jdbcProperties . put ( "driver" , "com.clickhouse.jdbc.ClickHouseDriver" )





val df : DataFrame = spark . read . jdbc ( jdbcUrl , "my_table" , jdbcProperties )





df . show ( )





Job . commit ( )

}

}



For more details, please visit our Spark & JDBC documentation.