From 1613dd35f798a864997fbe785d599f719c2940bd Mon Sep 17 00:00:00 2001 From: Chandra Devarakonda Date: Fri, 30 Jun 2023 19:49:23 +0000 Subject: [PATCH 1/3] Schedule 2.12.1 tests --- tests/tensorflow/r2.12/base.libsonnet | 6 +++--- tests/tensorflow/r2.12/common.libsonnet | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/tensorflow/r2.12/base.libsonnet b/tests/tensorflow/r2.12/base.libsonnet index 3b6bdaca2..85ae8797b 100644 --- a/tests/tensorflow/r2.12/base.libsonnet +++ b/tests/tensorflow/r2.12/base.libsonnet @@ -106,11 +106,11 @@ local volumes = import 'templates/volumes.libsonnet'; accelerator_type: %(acceleratorName)s, runtime_version: %(softwareVersion)s, network_config: {enable_external_ips: true}, - boot_disk: {source_image: 'projects/cloud-tpu-v2-images-dev/global/images/tpu-vm-tf-2-12-0-20230308'}, + boot_disk: {source_image: 'projects/cloud-tpu-v2-images-dev/global/images/family/tpu-vm-tf-2-12-1'}, metadata: { 'ssh-keys': 'xl-ml-test:$(cat /scripts/id_rsa.pub)', 'startup-script': %(startupScript)s, - 'tensorflow-docker-url': 'gcr.io/cloud-tpu-v2-images-dev/grpc_tpu_worker:tf-2.12.0' + 'tensorflow-docker-url': 'gcr.io/cloud-tpu-v2-images-dev/grpc_tpu_worker:tf-2.12.1' } }" https://tpu.googleapis.com/v2alpha1/projects/${project}/locations/${zone}/nodes?node_id=${tpu_name} @@ -139,7 +139,7 @@ local volumes = import 'templates/volumes.libsonnet'; if [[ ${softwareVersion: -3} == "pod" ]]; then yes '' | gcloud compute config-ssh sleep %(sleepTime)d - gcloud alpha compute tpus tpu-vm ssh ${tpu_name} --zone=${zone} --project=${project} --internal-ip --worker=all --command "sudo sed -i 's/TF_DOCKER_URL=.*/TF_DOCKER_URL=gcr.io\/cloud-tpu-v2-images-dev\/grpc_tpu_worker:tf-2.12.0\"/' /etc/systemd/system/tpu-runtime.service" + gcloud alpha compute tpus tpu-vm ssh ${tpu_name} --zone=${zone} --project=${project} --internal-ip --worker=all --command "sudo sed -i 's/TF_DOCKER_URL=.*/TF_DOCKER_URL=gcr.io\/cloud-tpu-v2-images-dev\/grpc_tpu_worker:tf-2.12.1\"/' /etc/systemd/system/tpu-runtime.service" gcloud alpha compute tpus tpu-vm ssh ${tpu_name} --zone=${zone} --project=${project} --internal-ip --worker=all --command "sudo systemctl daemon-reload && sudo systemctl restart tpu-runtime" fi sleep %(sleepTime)d diff --git a/tests/tensorflow/r2.12/common.libsonnet b/tests/tensorflow/r2.12/common.libsonnet index 7aac91abb..abca91ed3 100644 --- a/tests/tensorflow/r2.12/common.libsonnet +++ b/tests/tensorflow/r2.12/common.libsonnet @@ -21,11 +21,11 @@ local mixins = import 'templates/mixins.libsonnet'; ModelGardenTest:: common.ModelGardenTest { local config = self, - frameworkPrefix: 'tf.r2.12.0', + frameworkPrefix: 'tf.r2.12.1', tpuSettings+: { - softwareVersion: '2.12.0', + softwareVersion: '2.12.1', }, - imageTag: 'r2.12.0', + imageTag: 'r2.12.1', podTemplate+:: if config.accelerator.type == 'tpu' then { spec+: { @@ -72,7 +72,7 @@ local mixins = import 'templates/mixins.libsonnet'; print(str(tf.__file__)) ctc = cloud_tpu_client.Client(tpu=os.path.basename('$(TPU_NAME)'), zone=os.path.dirname('$(TPU_NAME)')) ctc.wait_for_healthy() - ctc.configure_tpu_version('2.12.0', restart_type='always') + ctc.configure_tpu_version('2.12.1', restart_type='always') ctc.wait_for_healthy() _VERSION_SWITCHER_ENDPOINT = 'http://{}:8475/requestversion' url = _VERSION_SWITCHER_ENDPOINT.format(ctc.network_endpoints()[0]['ipAddress']) @@ -209,7 +209,7 @@ local mixins = import 'templates/mixins.libsonnet'; }, }, }, - local functional_schedule = null, + local functional_schedule = '0 5 * * *', Functional:: mixins.Functional { schedule: functional_schedule, metricConfig+: { @@ -236,7 +236,7 @@ local mixins = import 'templates/mixins.libsonnet'; schedule: functional_schedule, }, Convergence:: mixins.Convergence { - schedule: null, + schedule: '0 11 * * *', metricConfig+: { sourceMap+:: { tensorboard+: { From 564f3ec4793a1b0b76c590db83a8f23e1f82d729 Mon Sep 17 00:00:00 2001 From: Chandra Devarakonda Date: Fri, 30 Jun 2023 20:14:38 +0000 Subject: [PATCH 2/3] Make conv test run every alternate day --- tests/tensorflow/r2.12/common.libsonnet | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/tensorflow/r2.12/common.libsonnet b/tests/tensorflow/r2.12/common.libsonnet index abca91ed3..4a1c8d230 100644 --- a/tests/tensorflow/r2.12/common.libsonnet +++ b/tests/tensorflow/r2.12/common.libsonnet @@ -236,7 +236,7 @@ local mixins = import 'templates/mixins.libsonnet'; schedule: functional_schedule, }, Convergence:: mixins.Convergence { - schedule: '0 11 * * *', + schedule: '0 11 * * 0,2,4', metricConfig+: { sourceMap+:: { tensorboard+: { From 2e6f2531bd887c86596f37bdb41c71e3c048217d Mon Sep 17 00:00:00 2001 From: Chandra Devarakonda Date: Thu, 6 Jul 2023 22:43:25 +0000 Subject: [PATCH 3/3] Adding 2.12.1 to the dashboard --- dashboard/app.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dashboard/app.yaml b/dashboard/app.yaml index e56e106af..6b232e8e3 100644 --- a/dashboard/app.yaml +++ b/dashboard/app.yaml @@ -33,7 +33,7 @@ resources: env_variables: REDISHOST: '10.25.27.107' REDISPORT: '6379' - TEST_NAME_PREFIXES: 'pt-nightly,pt-2.0,tf.nightly,tf.nightly-se,tf.exp,tf-r2.13.0,%-1vm,jax,flax,pax,mp-jax,mp-pax,mp-pt' + TEST_NAME_PREFIXES: 'pt-nightly,pt-2.0,tf.nightly,tf.nightly-se,tf.exp,tf.r2.12.1,tf-r2.13.0,%-1vm,jax,flax,pax,mp-jax,mp-pax,mp-pt' JOB_HISTORY_TABLE_NAME: 'xl-ml-test.metrics_handler_dataset.job_history' METRIC_HISTORY_TABLE_NAME: 'xl-ml-test.metrics_handler_dataset.metric_history'