Changes between Initial Version and Version 1 of cypress/GlobusInBarchJob


Ignore:
Timestamp:
04/03/25 16:38:41 (3 days ago)
Author:
fuji
Comment:

Legend:

Unmodified
Added
Removed
Modified
  • cypress/GlobusInBarchJob

    v1 v1  
     1= Example Pipeline for Data Transfer Using Globus and Computation in Batch Jobs =
     2Here we consider a pipeline to do the following processes.
     31. Transfer data files from Box to the Cypress Lustre directory.
     42. Perform computation using the data.
     53. Transfer the results to Box.
     64. Delete files in Cypress Lustre.
     7
     8== Scripts ==
     9=== Job Submission Script ===
     10'''submitJob.sh'''
     11{{{
     12#
     13# Pipeline for Data Transfer Using Globus and Computation
     14#
     15# Job name
     16JOB_NAME="COMPUTING1"
     17
     18# Set path
     19export BOX_DATA_DIR="/Test/"
     20export CYPRESS_WORK_DIR="/lustre/project/group/userid/test/"
     21export BOX_RESULT_DIR="/Test_result/"
     22#
     23# Submit a job to transfer data from Box to Cypress
     24JOB1=`sbatch --job-name=${JOB_NAME}_DL ./transferData.sh DOWNLOAD KEEP | awk '{print $4}'`;
     25echo $JOB1 "Submitted"
     26
     27# Submit a job to process data on Cypress
     28JOB2=`sbatch --job-name=${JOB_NAME} --dependency=afterok:$JOB1 ./computing.sh | awk '{print $4}'`;
     29echo $JOB2 "Submitted"
     30
     31# Submit a job to transfer data from Cypress to Box
     32JOB3=`sbatch --job-name=${JOB_NAME}_UL --dependency=afterok:$JOB2 ./transferData.sh UPLOAD DELETE | awk '{print $4}'`;
     33echo $JOB3 "Submitted"
     34}}}
     35'''JOB_NAME''' is the job name.
     36
     37'''BOX_DATA_DIR''' is the directory in Box where the source data is stored.
     38
     39'''CYPRESS_WORK_DIR''' is the directory where the downloaded data is stored.
     40
     41'''BOX_RESULT_DIR''' is the directory where results are uploaded in Box.
     42
     43=== Data Transfer Script ===
     44'''transferData.sh'''
     45{{{
     46#!/bin/bash
     47#SBATCH --partition=centos7
     48#SBATCH --qos=long
     49#SBATCH --time=7-00:00:00
     50#SBATCH --nodes=1
     51#SBATCH --ntasks-per-node=1
     52#SBATCH --cpus-per-task=1
     53
     54# Check options
     55if [ $# -ne 2 ]; then
     56    echo 'Usage: transferData.sh [DOWNLOAD | UPLOAD] [KEEP | DELETE]'
     57    exit 1
     58fi
     59
     60# Check path
     61if [[ -z "${BOX_DATA_DIR}" ]]; then
     62    echo "ERROR!  BOX_DATA_DIR isn't set."
     63    exit 1
     64fi
     65if [[ -z "${CYPRESS_WORK_DIR}" ]]; then
     66    echo "ERROR!  CYPRESS_WORK_DIR isn't set."
     67    exit 1
     68fi
     69if [[ -z "${BOX_RESULT_DIR}" ]]; then
     70    echo "ERROR!  BOX_RESULT_DIR isn't set."
     71    exit 1
     72fi
     73
     74# Start Globus Connect
     75module load globusconnectpersonal/3.2.5
     76globusconnect -start &
     77
     78# Set up CLI environment
     79source activate globus-cli
     80
     81# Obtain local UUID
     82MY_UUID=$(globus endpoint local-id)
     83uuid_code=$?
     84if [ $uuid_code -ne 0 ]; then
     85    echo "ERROR!  Globus Connect isn't activated."
     86    globusconnect -stop
     87    exit 1
     88fi
     89
     90# Make the source and destination path
     91if [[ "$1" == "DOWNLOAD" ]]; then
     92    SOURCE_EP=$TULANE_BOX:$BOX_DATA_DIR
     93    DEST_EP=$MY_UUID:$CYPRESS_WORK_DIR
     94else
     95    SOURCE_EP=$MY_UUID:$CYPRESS_WORK_DIR
     96    DEST_EP=$TULANE_BOX:$BOX_DATA_DIR   
     97fi
     98
     99# Check logged in to Globus
     100output=$(globus whoami >/dev/null 2>&1)
     101output_code=$?
     102if [ $output_code -ne 0 ]; then
     103    echo "ERROR!  Not logged in to Globus"
     104    globusconnect -stop
     105    exit 1
     106fi
     107
     108task_id=$(globus transfer "$SOURCE_EP" "$DEST_EP" --label "$SLURM_JOB_NAME" | tail -1 | awk '{print $3}')
     109output_code=$?
     110if [ $output_code -ne 0 ]; then
     111    echo "ERROR!  The transfer of data in could not be started."
     112    globusconnect -stop
     113    exit 1
     114fi
     115
     116# wait util the task done.
     117output=$(globus task wait $task_id)
     118output_code=$?
     119if [ $output_code -ne 0 ]; then
     120    echo "ERROR!  The transfer of data was failed."
     121    globus task cancel $task_id
     122    globusconnect -stop
     123    exit 1
     124fi
     125
     126# Check if the delete option is set
     127if [[ "$2" == "DELETE" ]]; then
     128    task_id=$(globus rm --recursive $SOURCE_EP |& awk '{print $6}' | sed -e "s/\"//g")
     129    globus task wait $task_id
     130fi
     131
     132# done successfully
     133source deactivate globus-cli
     134globusconnect -stop
     135exit 0
     136}}}
     137
     138
     139=== Computing Script ===
     140'''computing.sh'''
     141{{{
     142#!/bin/bash
     143#SBATCH --partition=defq
     144#SBATCH --qos=normal
     145#SBATCH --time=1-00:00:00
     146#SBATCH --nodes=1
     147#SBATCH --ntasks-per-node=1
     148#SBATCH --cpus-per-task=20
     149
     150# cd to working directory
     151cd ${CYPRESS_WORK_DIR}
     152pwd
     153
     154# module load ... computing something
     155touch RES
     156sleep 5
     157
     158#done
     159exit 0
     160}}}
     161
     162
     163== How to submit a job ==
     164{{{
     165sh ./SubmitJob.sh
     166}}}