diff --git a/Dockerfile b/Dockerfile index 03a0c0c6c..af4b5118d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,17 +1,17 @@ #FROM docker.io/bitnami/spark:3.1.2 -FROM apache/spark:3.5.0-python3 +FROM apache/spark:3.5.5-java17-python3 USER 0 RUN apt-get update && \ apt install -y curl vim -ENV SPARK_MASTER local[*] -ENV ZINGG_HOME /zingg-0.6.0 -ENV PATH $ZINGG_HOME/scripts:$PATH -ENV LANG C.UTF-8 +ENV SPARK_MASTER=local[*] +ENV ZINGG_HOME=/zingg-0.6.0 +ENV PATH=$ZINGG_HOME/scripts:$PATH +ENV LANG=C.UTF-8 WORKDIR / USER root WORKDIR /zingg-0.6.0 RUN curl --location https://github.com/zinggAI/zingg/releases/download/v0.6.0/zingg-0.6.0-spark-3.5.5.tar.gz | \ -tar --extract --gzip --strip=1 +tar --extract --gzip --strip=1 RUN pip install -r python/requirements.txt RUN pip install zingg RUN chmod -R a+rwx /zingg-0.6.0/models diff --git a/pom.xml b/pom.xml index eaa243ee4..00ea251d3 100644 --- a/pom.xml +++ b/pom.xml @@ -39,7 +39,7 @@ 3.4.0 - 2.12.17 + 2.12.19 3.4 2.12 0.10.0 @@ -56,7 +56,7 @@ 3.5.5 - 2.12.10 + 2.12.19 3.5 2.12 0.10.0 @@ -67,13 +67,13 @@ 0.6.0 false false - 11 - 11 + 17 + 17 UTF-8 2.10 2.5.2 2.9.1 - 3.8.1 + 3.15.0 2.3.2 2.3 2.4.3 @@ -89,7 +89,7 @@ SparkPackagesRepo https://repos.spark-packages.org/ - + @@ -113,7 +113,7 @@ commons-beanutils - commons-beanutils + commons-beanutils @@ -146,27 +146,27 @@ commons-logging 1.1.1 - - com.fasterxml.jackson.core - jackson-core - 2.15.2 - - - com.fasterxml.jackson.core - jackson-annotations - 2.15.2 - - - com.fasterxml.jackson.core - jackson-databind - 2.15.2 - - - com.fasterxml.jackson.module - jackson-module-scala_2.12 - 2.15.2 - - + + com.fasterxml.jackson.core + jackson-core + 2.15.2 + + + com.fasterxml.jackson.core + jackson-annotations + 2.15.2 + + + com.fasterxml.jackson.core + jackson-databind + 2.15.2 + + + com.fasterxml.jackson.module + jackson-module-scala_2.12 + 2.15.2 + + @@ -189,6 +189,10 @@ ${maven.compiler.source} ${maven.compiler.target} true + + -Xlint:deprecation + -Xlint:unchecked + @@ -216,70 +220,85 @@ true classes - 4 - - - + 4 + + --add-opens=java.base/java.lang=ALL-UNNAMED + --add-opens=java.base/java.lang.invoke=ALL-UNNAMED + --add-opens=java.base/java.lang.reflect=ALL-UNNAMED + --add-opens=java.base/java.io=ALL-UNNAMED + --add-opens=java.base/java.net=ALL-UNNAMED + --add-opens=java.base/java.nio=ALL-UNNAMED + --add-opens=java.base/java.util=ALL-UNNAMED + --add-opens=java.base/java.util.concurrent=ALL-UNNAMED + --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED + --add-opens=java.base/sun.nio.ch=ALL-UNNAMED + --add-opens=java.base/sun.nio.cs=ALL-UNNAMED + --add-opens=java.base/sun.security.action=ALL-UNNAMED + --add-opens=java.base/sun.util.calendar=ALL-UNNAMED + --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED + + + + + org.jacoco + jacoco-maven-plugin + 0.8.6 + + + prepare-agent + + prepare-agent + + + + report + prepare-package + + report + + + + - org.jacoco - jacoco-maven-plugin - 0.8.6 - - - prepare-agent - - prepare-agent - - - - report - prepare-package - - report - - - + org.apache.maven.plugins + maven-install-plugin + 2.5.2 + false + + + install-external-ss + validate + + ${project.basedir}/thirdParty/lib/secondstring.jar + default + com.wcohen.ss + secondstring + 2021 + jar + true + + + install-file + + + + install-external-py4j + validate + + ${project.basedir}/thirdParty/lib/py4j0.10.9.jar + default + py4j + py4j + 0.10.9 + jar + true + + + install-file + + + - - org.apache.maven.plugins - maven-install-plugin - 2.5.2 - false - - - install-external-ss - validate - - ${project.basedir}/thirdParty/lib/secondstring.jar - default - com.wcohen.ss - secondstring - 2021 - jar - true - - - install-file - - - - install-external-py4j - validate - - ${project.basedir}/thirdParty/lib/py4j0.10.9.jar - default - py4j - py4j - 0.10.9 - jar - true - - - install-file - - - - - + diff --git a/python/requirements.txt b/python/requirements.txt index 281558fd4..80fdba53a 100644 --- a/python/requirements.txt +++ b/python/requirements.txt @@ -1,8 +1,7 @@ -pandas -seaborn -matplotlib -sphinx -sphinx_rtd_theme -pyspark -numpy - +pyspark==3.5.5 +numpy==1.24.4 +pandas==2.0.3 +matplotlib==3.7.5 +seaborn==0.13.2 +sphinx==7.1.2 +sphinx_rtd_theme==2.0.0 \ No newline at end of file diff --git a/scripts/zingg.sh b/scripts/zingg.sh index d5b362257..2ccd89c5a 100755 --- a/scripts/zingg.sh +++ b/scripts/zingg.sh @@ -64,5 +64,20 @@ if [[ $RUN_PYTHON_DB_CONNECT_PHASE -eq 1 ]]; then python $EXECUTABLE else # All the additional options must be added here - $SPARK_HOME/bin/spark-submit --master $SPARK_MASTER $PROPERTIES --files "./log4j2.properties" --conf spark.executor.extraJavaOptions="$log4j_setting -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+HeapDumpOnOutOfMemoryError -Xloggc:/tmp/memLog.txt -XX:+UseCompressedOops" --conf spark.driver.extraJavaOptions="$log4j_setting" $LOGGING --driver-class-path $ZINGG_JARS $EXECUTABLE $@ --email $EMAIL --license $LICENSE + GC_LOG_DIR="${GC_LOG_DIR:-/tmp}" + GC_LOG_OPTS="-XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=${GC_LOG_DIR}/heapdump-%p.hprof -Xlog:gc*:file=${GC_LOG_DIR}/memLog.txt:time,uptime,level,tags" + export SPARK_SUBMIT_OPTS="${SPARK_SUBMIT_OPTS:-} $log4j_setting $GC_LOG_OPTS" + export SPARK_DRIVER_MEMORY="${SPARK_DRIVER_MEMORY:-4g}" + $SPARK_HOME/bin/spark-submit \ + --master "$SPARK_MASTER" \ + $PROPERTIES \ + --files "./log4j2.properties" \ + --driver-java-options "$SPARK_JAVA_OPTS" \ + --conf "spark.executor.extraJavaOptions=$SPARK_JAVA_OPTS" \ + $LOGGING \ + --driver-class-path "$ZINGG_JARS" \ + "$EXECUTABLE" \ + "$@" \ + --email "$EMAIL" \ + --license "$LICENSE" fi