From 525c940f8dd3115bb496547be6f6dcd6f56b0bab Mon Sep 17 00:00:00 2001 From: lmcnatt <85642387+lucymcnatt@users.noreply.github.com> Date: Tue, 20 Aug 2024 12:34:24 -0400 Subject: [PATCH] [IA-4967] Update COS image (#4731) Co-authored-by: LizBaldo Co-authored-by: LizBaldo --- .../main/resources/init-resources/gce-init.sh | 24 ++++++---- .../main/resources/init-resources/startup.sh | 44 ++++++++++++++----- http/src/main/resources/reference.conf | 2 +- jenkins/gce-custom-images/create_gce_image.sh | 5 ++- 4 files changed, 52 insertions(+), 23 deletions(-) diff --git a/http/src/main/resources/init-resources/gce-init.sh b/http/src/main/resources/init-resources/gce-init.sh index 70b115f00c3..ca2d913d110 100644 --- a/http/src/main/resources/init-resources/gce-init.sh +++ b/http/src/main/resources/init-resources/gce-init.sh @@ -218,11 +218,19 @@ mkdir -p ${DOCKER_COMPOSE_FILES_DIRECTORY} log 'Formatting and mounting persistent disk...' # Format and mount persistent disk -# Fix this to `sdb`. We've never seen a device name that's not `sdb`, -# Altho you some images, this cmd $(lsblk -o name,serial | grep 'user-disk' | awk '{print $1}') -# can be used to find device name, this doesn't work for COS images -USER_DISK_DEVICE_ID=$(lsblk -o name,serial | grep 'user-disk' | awk '{print $1}') -DISK_DEVICE_ID=${USER_DISK_DEVICE_ID:-sdb} +## The PD should be the only `sd` disk that is not mounted yet +AllsdDisks=($(lsblk --nodeps --noheadings --output NAME --paths | grep -i "sd")) +FreesdDisks=() +for Disk in "${AllsdDisks[@]}"; do + Mounts="$(lsblk -no MOUNTPOINT "${Disk}")" + if [ -z "$Mounts" ]; then + echo "Found our unmounted persistent disk!" + FreesdDisks="${Disk}" + else + echo "Not our persistent disk!" + fi +done +DISK_DEVICE_ID=${FreesdDisks} ## Only format disk is it hasn't already been formatted if [ "$IS_GCE_FORMATTED" == "false" ] ; then @@ -232,10 +240,10 @@ if [ "$IS_GCE_FORMATTED" == "false" ] ; then # Passing -F -F to mkfs.ext4 should force the tool to ignore the state of the partition. # Note that there should be two instances command-line switch (-F -F) to override this check - mkfs.ext4 -m 0 -E lazy_itable_init=0,lazy_journal_init=0,discard /dev/${DISK_DEVICE_ID} -F -F + mkfs.ext4 -m 0 -E lazy_itable_init=0,lazy_journal_init=0,discard ${DISK_DEVICE_ID} -F -F fi -mount -t ext4 -O discard,defaults /dev/${DISK_DEVICE_ID} ${WORK_DIRECTORY} +mount -t ext4 -O discard,defaults ${DISK_DEVICE_ID} ${WORK_DIRECTORY} # done persistent disk setup STEP_TIMINGS+=($(date +%s)) @@ -580,7 +588,7 @@ fi # If it's GCE, we resize the PD. Dataproc doesn't have PD if [ -f "/var/certs/jupyter-server.crt" ]; then echo "Resizing persistent disk attached to runtime $GOOGLE_PROJECT / $CLUSTER_NAME if disk size changed..." - resize2fs /dev/${DISK_DEVICE_ID} + resize2fs ${DISK_DEVICE_ID} fi # Remove any unneeded cached images to save disk space. diff --git a/http/src/main/resources/init-resources/startup.sh b/http/src/main/resources/init-resources/startup.sh index c978ef28d30..05dfe4cc53d 100644 --- a/http/src/main/resources/init-resources/startup.sh +++ b/http/src/main/resources/init-resources/startup.sh @@ -85,8 +85,19 @@ SERVER_CRT=$(proxyServerCrt) SERVER_KEY=$(proxyServerKey) ROOT_CA=$(rootCaPem) FILE=/var/certs/jupyter-server.crt -USER_DISK_DEVICE_ID=$(lsblk -o name,serial | grep 'user-disk' | awk '{print $1}') -DISK_DEVICE_ID=${USER_DISK_DEVICE_ID:-sdb} +## The PD should be the only `sd` disk that is not mounted yet +AllsdDisks=($(lsblk --nodeps --noheadings --output NAME --paths | grep -i "sd")) +FreesdDisks=() +for Disk in "${AllsdDisks[@]}"; do + Mounts="$(lsblk -no MOUNTPOINT "${Disk}")" + if [ -z "$Mounts" ]; then + echo "Found our unmounted persistent disk!" + FreesdDisks="${Disk}" + else + echo "Not our persistent disk!" + fi +done +DISK_DEVICE_ID=${FreesdDisks} JUPYTER_HOME=/etc/jupyter RSTUDIO_SCRIPTS=/etc/rstudio/scripts @@ -119,9 +130,9 @@ then JUPYTER_DOCKER_COMPOSE=$(ls ${DOCKER_COMPOSE_FILES_DIRECTORY}/jupyter-docker*) export WORK_DIRECTORY='/mnt/disks/work' - fsck.ext4 -tvy /dev/${DISK_DEVICE_ID} + fsck.ext4 -tvy ${DISK_DEVICE_ID} mkdir -p /mnt/disks/work - mount -t ext4 -O discard,defaults /dev/${DISK_DEVICE_ID} ${WORK_DIRECTORY} + mount -t ext4 -O discard,defaults ${DISK_DEVICE_ID} ${WORK_DIRECTORY} chmod a+rwx /mnt/disks/work # (1/6/22) Restart Jupyter Container to reset `NOTEBOOKS_DIR` for existing runtimes. This code can probably be removed after a year @@ -147,14 +158,12 @@ END ${DOCKER_COMPOSE} -f ${JUPYTER_DOCKER_COMPOSE} stop ${DOCKER_COMPOSE} -f ${JUPYTER_DOCKER_COMPOSE} rm -f ${DOCKER_COMPOSE} --env-file=/var/variables.env -f ${JUPYTER_DOCKER_COMPOSE} up -d + + # the docker containers need to be restarted or the jupyter container + # will fail to start until the appropriate volume/device exists + docker restart $JUPYTER_SERVER_NAME + docker restart $WELDER_SERVER_NAME - if [ "${GPU_ENABLED}" == "true" ] ; then - # Containers will usually restart just fine. But when gpu is enabled, - # jupyter container will fail to start until the appropriate volume/device exists. - # Hence restart jupyter container here - docker restart jupyter-server - docker restart welder-server - fi # This line is only for migration (1/26/2022). Say you have an existing runtime where jupyter container's PD is mapped at $HOME/notebooks, # then all jupyter related files (.jupyter, .local) and things like bash history etc all lives under $HOME. The home diretory change will # make it so that next time this runtime starts up, PD will be mapped to $HOME, but this means that the previous files under $HOME (.jupyter, .local etc) @@ -174,6 +183,16 @@ END docker cp /var/${JUPYTER_NOTEBOOK_FRONTEND_CONFIG} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/nbconfig/ fi + if [ ! -z "$RSTUDIO_DOCKER_IMAGE" ] ; then + echo "Restarting Rstudio Container $GOOGLE_PROJECT / $CLUSTER_NAME..." + + # the docker containers need to be restarted or the jupyter container + # will fail to start until the appropriate volume/device exists + docker restart $RSTUDIO_SERVER_NAME + docker restart $WELDER_SERVER_NAME + + fi + if [ "$UPDATE_WELDER" == "true" ] ; then echo "Upgrading welder..." @@ -251,6 +270,7 @@ else fi fi + function failScriptIfError() { if [ $EXIT_CODE -ne 0 ]; then echo "Fail to docker-compose start container ${EXIT_CODE}. Output is saved to ${START_USER_SCRIPT_OUTPUT_URI}" @@ -396,5 +416,5 @@ fi # If it's GCE, we resize the PD. Dataproc doesn't have PD if [ -f "$FILE" ]; then echo "Resizing persistent disk attached to runtime $GOOGLE_PROJECT / $CLUSTER_NAME if disk size changed..." - resize2fs /dev/${DISK_DEVICE_ID} + resize2fs ${DISK_DEVICE_ID} fi diff --git a/http/src/main/resources/reference.conf b/http/src/main/resources/reference.conf index 10054d8f25d..e4ba8d33d68 100644 --- a/http/src/main/resources/reference.conf +++ b/http/src/main/resources/reference.conf @@ -111,7 +111,7 @@ dataproc { } gce { - customGceImage = "projects/broad-dsp-gcr-public/global/images/leo-gce-image-2024-08-13-15-17-24" + customGceImage = "projects/broad-dsp-gcr-public/global/images/leo-gce-image-2024-08-14-15-03-19" userDiskDeviceName = "user-disk" defaultScopes = [ "https://www.googleapis.com/auth/userinfo.email", diff --git a/jenkins/gce-custom-images/create_gce_image.sh b/jenkins/gce-custom-images/create_gce_image.sh index 1a7af924c96..a0c20fcb3aa 100755 --- a/jenkins/gce-custom-images/create_gce_image.sh +++ b/jenkins/gce-custom-images/create_gce_image.sh @@ -39,12 +39,13 @@ fi # Set this to the tag of the Daisy image you had pulled DAISY_IMAGE_TAG="release" + # When updating, to find the resource path: # 1. run `gcloud compute images list --no-standard-images --project=cos-cloud | grep lts` to get the list of available container-optimized OS images # 2. select the image of interest, say, `cos-89-16108-403-22` # 3. run `gcloud compute images describe cos-89-16108-403-22 --project cos-cloud | grep selfLink` # 4. extract the segments starting with 'projects' -BASE_IMAGE="projects/cos-cloud/global/images/cos-101-17162-463-16" +BASE_IMAGE="projects/cos-cloud/global/images/cos-113-18244-85-64" if [[ "$VALIDATE_WORKFLOW" == "true" ]]; then DAISY_CONTAINER="gcr.io/compute-image-tools/daisy:${DAISY_IMAGE_TAG} -validate" @@ -72,4 +73,4 @@ gcloud compute images add-iam-policy-binding \ if ! [ -z "$OUTPUT_FILE_PATH" ]; then echo "projects/$GOOGLE_PROJECT/global/images/$OUTPUT_IMAGE_NAME" > $OUTPUT_FILE_PATH -fi \ No newline at end of file +fi