AN
Size: a a a
AN
CO
OI
SK

SK
config = {
"spark.executor.instances": "1",
"spark.executor.memory": "1g",
"spark.executor.cores": "1",
# "spark.driver.blockManager.port": "7777",
# "spark.driver.port": "2222",
# "spark.driver.host": "jupyter.spark.svc.cluster.local",
# "spark.driver.bindAddress": "0.0.0.0",
"spark.kubernetes.namespace": "default",
"spark.kubernetes.authenticate.caCertFile": "./ca.crt",
"spark.kubernetes.authenticate.oauthTokenFile": "./token",
"spark.kubernetes.container.image": "newfrontdocker/spark:v3.0.1-j14",
"spark.hadoop.fs.s3a.access.key": "test",
"spark.hadoop.fs.s3a.secret.key": "test",
"spark.hadoop.fs.s3a.endpoint": "localstack.kube-system.svc.cluster.local:4566",
"spark.hadoop.fs.s3a.connection.ssl.enabled": "false",
"spark.hadoop.fs.s3a.path.style.access": "true",
"spark.hadoop.fs.s3a.impl": "org.apache.hadoop.fs.s3a.S3AFileSystem",
"spark.hadoop.com.amazonaws.services.s3.enableV4": "true",
}SK
def get_spark_session(app_name: str, config: dict, enable_hive=True):
conf = SparkConf()
conf.setMaster("k8s://https://kubernetes.default.svc.cluster.local")
for key, value in config.items():
conf.set(key, value)
# prepare spark sesseion to be returned
spark = SparkSession.builder
spark = spark.appName(app_name)
spark = spark.enableHiveSupport() if enable_hive else spark
return spark.config(conf=conf).getOrCreate()
spark = get_spark_session("getting_started", config)
DZ
DZ
SK
SK
SK
kubectl logs не достатьDZ
DZ
DZ
SK
DZ
SK
SK
SK