Skip to content

Commit

Permalink
Merge branch 'develop' into mtalbott-sam-disks
Browse files Browse the repository at this point in the history
  • Loading branch information
LizBaldo authored Feb 12, 2025
2 parents 814a783 + 1a00c81 commit cd38bf5
Show file tree
Hide file tree
Showing 18 changed files with 1,106 additions and 203 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/unit_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:

services:
mysql:
image: mysql:8.0.40
image: mysql:8.4
env:
MYSQL_ROOT_PASSWORD: leonardo-test
MYSQL_USER: leonardo-test
Expand Down
4 changes: 2 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# 1. Build the Helm client Go lib
# 2. Deploy Leonardo pointing to the Go lib

FROM golang:1.20 AS helm-go-lib-builder
FROM golang:1.23 AS helm-go-lib-builder

# TODO Consider moving repo set-up to the build script to make CI versioning easier
RUN mkdir /helm-go-lib-build && \
Expand Down Expand Up @@ -42,7 +42,7 @@ COPY --from=helm-go-lib-builder /helm-go-lib-build/helm-scala-sdk/helm-go-lib /l
# Install the Helm3 CLI client using a provided script because installing it via the RHEL package managing didn't work
RUN curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/master/scripts/get-helm-3 && \
chmod 700 get_helm.sh && \
./get_helm.sh --version v3.11.2 && \
./get_helm.sh --version v3.15.3 && \
rm get_helm.sh

# Add the repos containing nginx, galaxy, setup apps, custom apps, cromwell and aou charts
Expand Down
4 changes: 2 additions & 2 deletions docker/run-mysql.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env bash

# The CloudSQL console simply states "MySQL 8.0" so we may not match the minor version number
MYSQL_VERSION=8.0.40
# The CloudSQL console simply states "MySQL 8.4" so we may not match the minor version number
MYSQL_VERSION=8.4
start() {

echo "attempting to remove old $CONTAINER container..."
Expand Down
3 changes: 3 additions & 0 deletions http/src/main/resources/leo.conf
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ gce {
}

gke {
cluster {
version = ${?KUBERNETES_VERSION}
}
galaxyApp {
postgres.password = ${?GALAXY_POSTGRES_PASSWORD}
orchUrl = ${?ORCH_URL}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<databaseChangeLog logicalFilePath="leonardo" xmlns="http://www.liquibase.org/xml/ns/dbchangelog" xmlns:ext="http://www.liquibase.org/xml/ns/dbchangelog-ext" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.liquibase.org/xml/ns/dbchangelog-ext http://www.liquibase.org/xml/ns/dbchangelog/dbchangelog-ext.xsd http://www.liquibase.org/xml/ns/dbchangelog http://www.liquibase.org/xml/ns/dbchangelog/dbchangelog-3.4.xsd">
<changeSet logicalFilePath="leonardo" author="thibault" id="label">
<validCheckSum>8:1d8581dc0977ea88b1f006f6bc00f5b9</validCheckSum>
<comment>
Mysql 8.4+ does not allow partial keys to be referenced for a foreign key anymore, see https://bugs.mysql.com/bug.php?id=114838.
This changeSet has been modified to reflect that; the validCheckSum is the checksum from when the flag did not need to be set because the default was OFF.
</comment>
<sql> SET restrict_fk_on_non_standard_key = OFF; </sql>
<createTable tableName="LABEL">
<column name="clusterId" type="BIGINT">
<constraints nullable="false"/>
Expand Down
4 changes: 2 additions & 2 deletions http/src/main/resources/reference.conf
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ dataproc {
}

# Cached dataproc image used by Terra
customDataprocImage = "projects/broad-dsp-gcr-public/global/images/leo-dataproc-image-2-1-11-debian11-2024-12-16-17-22-28"
customDataprocImage = "projects/broad-dsp-gcr-public/global/images/leo-dataproc-image-2-1-11-debian11-2025-01-21-15-07-39"

# The ratio of memory allocated to spark. 0.8 = 80%.
# Hail/Spark users generally allocate 80% of the ram to the JVM.
Expand Down Expand Up @@ -723,7 +723,7 @@ gke {
"69.173.112.0/21"
]
# See https://cloud.google.com/kubernetes-engine/docs/release-notes
version = "1.28"
version = "1.30"
nodepoolLockCacheExpiryTime = 1 hour
nodepoolLockCacheMaxSize = 200

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,11 @@ import cats.syntax.all._
import org.broadinstitute.dsde.workbench.azure.AzureCloudContext
import org.broadinstitute.dsde.workbench.google2.OperationName
import org.broadinstitute.dsde.workbench.leonardo.{LabelMap, Runtime}
import org.broadinstitute.dsde.workbench.leonardo.SamResourceId.RuntimeSamResourceId
import org.broadinstitute.dsde.workbench.leonardo.SamResourceId.{
ProjectSamResourceId,
RuntimeSamResourceId,
WorkspaceResourceSamResourceId
}
import org.broadinstitute.dsde.workbench.leonardo.config.Config
import org.broadinstitute.dsde.workbench.leonardo.db.LeoProfile.api._
import org.broadinstitute.dsde.workbench.leonardo.db.LeoProfile.dummyDate
Expand All @@ -24,6 +28,7 @@ import org.broadinstitute.dsde.workbench.leonardo.model.{
import org.broadinstitute.dsde.workbench.model.google.{GcsBucketName, GoogleProject}
import org.broadinstitute.dsde.workbench.model.{IP, WorkbenchEmail}

import java.util.UUID
import java.sql.SQLDataException
import java.time.Instant
import scala.concurrent.ExecutionContext
Expand Down Expand Up @@ -65,7 +70,6 @@ object RuntimeServiceDbQueries {
Option[WorkspaceId],
Option[(String, String)]
)

private object ListRuntimesRecord {
def apply(product: ListRuntimesProduct): ListRuntimesRecord = product match {
case (l,
Expand Down Expand Up @@ -303,26 +307,33 @@ object RuntimeServiceDbQueries {

/**
* List runtimes filtered by the given terms. Only return authorized resources (per reader*Ids and/or owner*Ids).
*
* @param labelMap
* @param excludeStatuses
* @param creatorEmail
* @param workspaceId
* @param cloudProvider
* @param runtimeIds
* @param readerRuntimeIds
* @param readerWorkspaceIds
* @param ownerWorkspaceIds
* @param readerGoogleProjectIds
* @param ownerGoogleProjectIds
* @return
*/
def listRuntimes(runtimeIds: Set[SamResourceId] = Set.empty,
cloudContext: Option[CloudContext] = None,
cloudProvider: Option[CloudProvider] = None,
creatorEmail: Option[WorkbenchEmail] = None,
excludeStatuses: List[RuntimeStatus] = List.empty,
labelMap: LabelMap = Map.empty[String, String],
workspaceId: Option[WorkspaceId] = None
def listRuntimes(
// Authorizations
ownerGoogleProjectIds: Set[ProjectSamResourceId] = Set.empty,
ownerWorkspaceIds: Set[WorkspaceResourceSamResourceId] = Set.empty,
readerGoogleProjectIds: Set[ProjectSamResourceId] = Set.empty,
readerRuntimeIds: Set[SamResourceId] = Set.empty,
readerWorkspaceIds: Set[WorkspaceResourceSamResourceId] = Set.empty,

// Filters
cloudContext: Option[CloudContext] = None,
cloudProvider: Option[CloudProvider] = None,
creatorEmail: Option[WorkbenchEmail] = None,
excludeStatuses: List[RuntimeStatus] = List.empty,
labelMap: LabelMap = Map.empty[String, String],
workspaceId: Option[WorkspaceId] = None
)(implicit ec: ExecutionContext): DBIO[Vector[ListRuntimeResponse2]] = {
// Normalize filter params
val provider = if (cloudProvider.isEmpty) {
Expand All @@ -332,8 +343,93 @@ object RuntimeServiceDbQueries {
}
} else cloudProvider

val runtimes = clusterQuery
.filter(_.internalId inSetBind runtimeIds.map(_.asString))
// Optimize Google project list if filtering to a specific cloud provider or context
val ownedProjects: Set[CloudContextDb] = ((provider, cloudContext) match {
case (Some(CloudProvider.Azure), _) => Set.empty[CloudContextDb]
case (Some(CloudProvider.Gcp), Some(CloudContext.Gcp(value))) =>
ownerGoogleProjectIds.filter(samId => samId.googleProject == value)
case _ => ownerGoogleProjectIds
}).map { case samId: SamResourceId =>
CloudContextDb(samId.resourceId)
}
val readProjects: Set[CloudContextDb] = ((provider, cloudContext) match {
case (Some(CloudProvider.Azure), _) => Set.empty[CloudContextDb]
case (Some(CloudProvider.Gcp), Some(CloudContext.Gcp(value))) =>
readerGoogleProjectIds.filter(samId => samId.googleProject == value)
case _ => readerGoogleProjectIds
}).map { case samId: SamResourceId =>
CloudContextDb(samId.resourceId)
}

// Optimize workspace list if filtering to a single workspace
val ownedWorkspaces: Set[WorkspaceId] = (workspaceId match {
case Some(wId) => ownerWorkspaceIds.filter(samId => WorkspaceId(UUID.fromString(samId.resourceId)) == wId)
case None => ownerWorkspaceIds
}).map(samId => WorkspaceId(UUID.fromString(samId.resourceId)))
val readWorkspaces: Set[WorkspaceId] = (workspaceId match {
case Some(wId) => readerWorkspaceIds.filter(samId => WorkspaceId(UUID.fromString(samId.resourceId)) == wId)
case None => readerWorkspaceIds
}).map(samId => WorkspaceId(UUID.fromString(samId.resourceId)))

val readRuntimes: Set[String] = readerRuntimeIds.map(readId => readId.asString)

val runtimeInReadWorkspaces: Option[ClusterTable => Rep[Option[Boolean]]] =
if (readRuntimes.isEmpty || readWorkspaces.isEmpty)
None
else
Some(runtime =>
(runtime.internalId inSetBind readRuntimes) &&
(runtime.workspaceId inSetBind readWorkspaces)
)

val runtimeInReadProjects: Option[ClusterTable => Rep[Option[Boolean]]] =
if (readRuntimes.isEmpty || readProjects.isEmpty)
None
else
Some(runtime =>
(runtime.internalId inSetBind readRuntimes) &&
(runtime.cloudProvider.? === (CloudProvider.Gcp: CloudProvider)) &&
(runtime.cloudContextDb inSetBind readProjects)
)

val runtimeInOwnedWorkspaces: Option[ClusterTable => Rep[Option[Boolean]]] =
if (ownedWorkspaces.isEmpty)
None
else
Some(runtime => runtime.workspaceId inSetBind ownedWorkspaces)

val runtimeInOwnedProjects: Option[ClusterTable => Rep[Option[Boolean]]] =
if (ownedProjects.isEmpty)
None
else if (cloudContext.isDefined) {
// If cloudContext is defined, we're already applying the filter in runtimesFiltered below.
// No need to filter by the list of user owned projects anymore as long as the specified
// project is owned by the user.
if (ownedProjects.exists(x => x.value == cloudContext.get.asString))
Some(_ => Some(true))
else None
} else
Some(runtime =>
(runtime.cloudProvider.? === (CloudProvider.Gcp: CloudProvider)) &&
(runtime.cloudContextDb inSetBind ownedProjects)
)

val runtimesAuthorized =
clusterQuery.filter[Rep[Option[Boolean]]] { runtime: ClusterTable =>
Seq(
runtimeInReadWorkspaces,
runtimeInOwnedWorkspaces,
runtimeInReadProjects,
runtimeInOwnedProjects
)
.mapFilter(opt => opt)
.map(_(runtime))
.reduceLeftOption(_ || _)
.getOrElse(Some(false): Rep[Option[Boolean]])
}

val runtimesFiltered = runtimesAuthorized
// Filter by params
.filterOpt(workspaceId) { case (runtime, wId) =>
runtime.workspaceId === (Some(wId): Rep[Option[WorkspaceId]])
}
Expand Down Expand Up @@ -362,6 +458,9 @@ object RuntimeServiceDbQueries {
)
.length === labelMap.size
}

// Assemble response
val runtimesJoined = runtimesFiltered
.join(runtimeConfigs)
.on((runtime, runtimeConfig) => runtime.runtimeConfigId === runtimeConfig.id)
.map { case (runtime, runtimeConfig) =>
Expand Down Expand Up @@ -399,7 +498,7 @@ object RuntimeServiceDbQueries {
)
}

runtimes.result
runtimesJoined.result
.map { records: Seq[ListRuntimesProduct] =>
records
.map(record => ListRuntimesRecord(record))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ class AppDependenciesBuilder(baselineDependenciesBuilder: BaselineDependenciesBu

val azureService = new RuntimeV2ServiceInterp[IO](
baselineDependencies.runtimeServicesConfig,
baselineDependencies.authProvider,
baselineDependencies.publisherQueue,
baselineDependencies.dateAccessedUpdaterQueue,
baselineDependencies.wsmClientProvider,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import org.broadinstitute.dsde.workbench.google2.{
MachineTypeName,
ZoneName
}
import org.broadinstitute.dsde.workbench.leonardo.JsonCodec._
import org.broadinstitute.dsde.workbench.leonardo.RuntimeImageType.{CryptoDetector, Jupyter, Proxy, Welder}
import org.broadinstitute.dsde.workbench.leonardo.SamResourceId.{
PersistentDiskSamResourceId,
Expand All @@ -34,6 +35,7 @@ import org.broadinstitute.dsde.workbench.leonardo.db._
import org.broadinstitute.dsde.workbench.leonardo.http.service.DiskServiceInterp.getDiskSamPolicyMap
import org.broadinstitute.dsde.workbench.leonardo.http.service.RuntimeServiceInterp._
import org.broadinstitute.dsde.workbench.leonardo.model.SamResource.RuntimeSamResource
import org.broadinstitute.dsde.workbench.leonardo.model.SamResourceAction._
import org.broadinstitute.dsde.workbench.leonardo.model._
import org.broadinstitute.dsde.workbench.leonardo.monitor.LeoPubsubMessage._
import org.broadinstitute.dsde.workbench.leonardo.monitor.{
Expand Down Expand Up @@ -247,19 +249,36 @@ class RuntimeServiceInterp[F[_]: Parallel](
for {
ctx <- as.ask

samResources <- samService.listResources(userInfo.accessToken.token, RuntimeSamResource.resourceType)
// throw 403 if user doesn't have project permission
hasProjectPermission <- cloudContext.traverse(cc =>
authProvider.isUserProjectReader(
cc,
userInfo
)
)
_ <- ctx.span.traverse(s => F.delay(s.addAnnotation("Done checking project permission with Sam")))

_ <- F.raiseWhen(!hasProjectPermission.getOrElse(true))(ForbiddenError(userInfo.userEmail, Some(ctx.traceId)))

(labelMap, includeDeleted, _) <- F.fromEither(processListParameters(params))
excludeStatuses = if (includeDeleted) List.empty else List(RuntimeStatus.Deleted)
creatorOnly <- F.fromEither(processCreatorOnlyParameter(userInfo.userEmail, params, ctx.traceId))

authorizedIds <- getAuthorizedIds(userInfo, creatorOnly)
_ <- ctx.span.traverse(s => F.delay(s.addAnnotation("Start DB query for listRuntimes")))
runtimes <- RuntimeServiceDbQueries
.listRuntimes(samResources.map(RuntimeSamResourceId).toSet,
excludeStatuses = excludeStatuses,
creatorEmail = creatorOnly,
cloudContext = cloudContext,
labelMap = labelMap
.listRuntimes(
// Authorization scopes
ownerGoogleProjectIds = authorizedIds.ownerGoogleProjectIds,
ownerWorkspaceIds = authorizedIds.ownerWorkspaceIds,
readerGoogleProjectIds = authorizedIds.readerGoogleProjectIds,
readerRuntimeIds = authorizedIds.readerRuntimeIds,
readerWorkspaceIds = authorizedIds.readerWorkspaceIds,
// Filters
excludeStatuses = excludeStatuses,
creatorEmail = creatorOnly,
cloudContext = cloudContext,
labelMap = labelMap
)
.transaction

Expand Down Expand Up @@ -830,6 +849,55 @@ class RuntimeServiceInterp[F[_]: Parallel](
userEmail
)
} yield runtime

private[service] def getAuthorizedIds(
userInfo: UserInfo,
creatorEmail: Option[WorkbenchEmail] = None
)(implicit ev: Ask[F, AppContext]): F[AuthorizedIds] = for {
// Authorize: user has an active account and has accepted terms of service
_ <- authProvider.checkUserEnabled(userInfo)

// Authorize: get resource IDs the user can see
// HACK: leonardo is modeling access control here, handling inheritance
// of workspace and project-level permissions. Sam and WSM already do this,
// and should be considered the point of truth.

// HACK: leonardo short-circuits access control to grant access to runtime creators.
// This supports the use case where `terra-ui` requests status of runtimes that have
// not yet been provisioned in Sam.
creatorRuntimeIdsBackdoor: Set[RuntimeSamResourceId] <- creatorEmail match {
case Some(email: WorkbenchEmail) =>
RuntimeServiceDbQueries
.listRuntimeIdsForCreator(email)
.map(_.map(_.samResource).toSet)
.transaction
case None => F.pure(Set.empty: Set[RuntimeSamResourceId])
}

// v1 runtimes (sam resource type `notebook-cluster`) are readable only
// by their creators (`Creator` is the SamResource.Runtime `ownerRoleName`),
// if the creator also has read access to the corresponding SamResource.Project
creatorV1RuntimeIds: Set[RuntimeSamResourceId] <- authProvider
.listResourceIds[RuntimeSamResourceId](hasOwnerRole = true, userInfo)
readerProjectIds: Set[ProjectSamResourceId] <- authProvider
.listResourceIds[ProjectSamResourceId](hasOwnerRole = false, userInfo)

// v1 runtimes are discoverable by owners on the corresponding Project
ownerProjectIds: Set[ProjectSamResourceId] <- authProvider
.listResourceIds[ProjectSamResourceId](hasOwnerRole = true, userInfo)

// combine: to read a runtime, user needs to be at least one of:
// - creator of a v1 runtime (Sam-authenticated)
// - any role on a v2 runtime (Sam-authenticated)
// - creator of a runtime (in Leo db) and filtering their request by creator-only
readerRuntimeIds: Set[SamResourceId] = creatorV1RuntimeIds ++ creatorRuntimeIdsBackdoor
} yield AuthorizedIds(
ownerGoogleProjectIds = ownerProjectIds,
ownerWorkspaceIds = Set.empty,
readerGoogleProjectIds = readerProjectIds,
readerRuntimeIds = readerRuntimeIds,
readerWorkspaceIds = Set.empty
)
}

object RuntimeServiceInterp {
Expand Down
Loading

0 comments on commit cd38bf5

Please sign in to comment.