{ "cells": [ { "cell_type": "markdown", "id": "18bbba87-d3b8-4912-9457-d34d1561fbf6", "metadata": {}, "source": [ "## BioAnalyze HPC Helper Libraries\n", "\n", "BioAnalyze HPC comes with a set of libraries and defaults to help you get started quickly. One of these is modules, which is a way of seperating out different software environments. There are also several CLI components." ] }, { "cell_type": "code", "execution_count": 1, "id": "0eeba3ca-c145-43d0-90fc-84b4525b33c5", "metadata": {}, "outputs": [], "source": [ "module load pcluster-helpers" ] }, { "cell_type": "code", "execution_count": 2, "id": "9cc1048d-6a68-4ccc-8238-09816fc5c0c2", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[1m \u001b[0m\n", "\u001b[1m \u001b[0m\u001b[1;33mUsage: \u001b[0m\u001b[1mpcluster-helper [OPTIONS] COMMAND [ARGS]...\u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\n", "\u001b[1m \u001b[0m\n", " Helper functions for aws parallelcluster. \n", " \n", "\u001b[2m╭─\u001b[0m\u001b[2m Options ───────────────────────────────────────────────────────────────────\u001b[0m\u001b[2m─╮\u001b[0m\n", "\u001b[2m│\u001b[0m \u001b[1;36m-\u001b[0m\u001b[1;36m-help\u001b[0m Show this message and exit. \u001b[2m│\u001b[0m\n", "\u001b[2m╰──────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n", "\u001b[2m╭─\u001b[0m\u001b[2m Commands ──────────────────────────────────────────────────────────────────\u001b[0m\u001b[2m─╮\u001b[0m\n", "\u001b[2m│\u001b[0m \u001b[1;36mgen-nxf-slurm-config\u001b[0m\u001b[1;36m \u001b[0m Generate a slurm.config for nextflow that is \u001b[2m│\u001b[0m\n", "\u001b[2m│\u001b[0m \u001b[1;36m \u001b[0m compatible with your cluster. \u001b[2m│\u001b[0m\n", "\u001b[2m│\u001b[0m \u001b[1;36msinfo \u001b[0m\u001b[1;36m \u001b[0m A more helpful sinfo \u001b[2m│\u001b[0m\n", "\u001b[2m╰──────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n", "\n" ] } ], "source": [ "pcluster-helper --help" ] }, { "cell_type": "markdown", "id": "2942525c-1fea-445f-9be1-9f28d4c93a7c", "metadata": {}, "source": [ "### SLURM - SInfo Helper\n", "\n", "When you submit jobs to an HPC cluster it is important to know how to map what your job needs to what the cluster provides. \n", "\n", "**Your cluster may look different depending on your queue configuration setup.** \n", "\n", "Please make sure to submit the jobs from your own sinfo table and not the one shown here." ] }, { "cell_type": "code", "execution_count": 3, "id": "7bbb7e23-312b-4485-9047-611cc3facfbb", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Printing sinfo table\n", "\u001b[3m SLURM SInfo \u001b[0m\n", "┏━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┓\n", "┃\u001b[1m \u001b[0m\u001b[1mQueue\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m Constraint\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mMem(Gib)\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mCPU\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m EC2\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mSBATCH \u001b[0m\u001b[1m \u001b[0m┃\n", "┡━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━┩\n", "│ \u001b[94mbasic\u001b[0m │ \u001b[94mt2micro\u001b[0m │ \u001b[94m1\u001b[0m │ \u001b[94m1\u001b[0m │ \u001b[94mt2.micro\u001b[0m │ \u001b[94m#SBATCH \u001b[0m │\n", "│ │ │ │ │ │ \u001b[94m--partition=basic\u001b[0m │\n", "│ │ │ │ │ │ \u001b[94m#SBATCH \u001b[0m │\n", "│ │ │ │ │ │ \u001b[94m--constraint=t2micro\u001b[0m │\n", "│ │ │ │ │ │ \u001b[94m#SBATCH -c=1\u001b[0m │\n", "├───────┼─────────────┼──────────┼─────┼──────────────┼────────────────────────┤\n", "│ \u001b[94mbasic\u001b[0m │ \u001b[94mt2medium\u001b[0m │ \u001b[94m4\u001b[0m │ \u001b[94m2\u001b[0m │ \u001b[94mt2.medium\u001b[0m │ \u001b[94m#SBATCH \u001b[0m │\n", "│ │ │ │ │ │ \u001b[94m--partition=basic\u001b[0m │\n", "│ │ │ │ │ │ \u001b[94m#SBATCH \u001b[0m │\n", "│ │ │ │ │ │ \u001b[94m--constraint=t2medium\u001b[0m │\n", "│ │ │ │ │ │ \u001b[94m#SBATCH -c=2\u001b[0m │\n", "├───────┼─────────────┼──────────┼─────┼──────────────┼────────────────────────┤\n", "│ \u001b[94mbasic\u001b[0m │ \u001b[94mt3alarge\u001b[0m │ \u001b[94m8\u001b[0m │ \u001b[94m1\u001b[0m │ \u001b[94mt3a.large\u001b[0m │ \u001b[94m#SBATCH \u001b[0m │\n", "│ │ │ │ │ │ \u001b[94m--partition=basic\u001b[0m │\n", "│ │ │ │ │ │ \u001b[94m#SBATCH \u001b[0m │\n", "│ │ │ │ │ │ \u001b[94m--constraint=t3alarge\u001b[0m │\n", "│ │ │ │ │ │ \u001b[94m#SBATCH -c=1\u001b[0m │\n", "├───────┼─────────────┼──────────┼─────┼──────────────┼────────────────────────┤\n", "│ \u001b[94mbasic\u001b[0m │ \u001b[94mt3axlarge\u001b[0m │ \u001b[94m16\u001b[0m │ \u001b[94m2\u001b[0m │ \u001b[94mt3a.xlarge\u001b[0m │ \u001b[94m#SBATCH \u001b[0m │\n", "│ │ │ │ │ │ \u001b[94m--partition=basic\u001b[0m │\n", "│ │ │ │ │ │ \u001b[94m#SBATCH \u001b[0m │\n", "│ │ │ │ │ │ \u001b[94m--constraint=t3axlarge\u001b[0m │\n", "│ │ │ │ │ │ \u001b[94m#SBATCH -c=2\u001b[0m │\n", "├───────┼─────────────┼──────────┼─────┼──────────────┼────────────────────────┤\n", "│ \u001b[94mbasic\u001b[0m │ \u001b[94mt32xlarge\u001b[0m │ \u001b[94m32\u001b[0m │ \u001b[94m4\u001b[0m │ \u001b[94mt3.2xlarge\u001b[0m │ \u001b[94m#SBATCH \u001b[0m │\n", "│ │ │ │ │ │ \u001b[94m--partition=basic\u001b[0m │\n", "│ │ │ │ │ │ \u001b[94m#SBATCH \u001b[0m │\n", "│ │ │ │ │ │ \u001b[94m--constraint=t32xlarge\u001b[0m │\n", "│ │ │ │ │ │ \u001b[94m#SBATCH -c=4\u001b[0m │\n", "├───────┼─────────────┼──────────┼─────┼──────────────┼────────────────────────┤\n", "│ \u001b[91mbig\u001b[0m │ \u001b[91mr6i4xlarge\u001b[0m │ \u001b[91m128\u001b[0m │ \u001b[91m8\u001b[0m │ \u001b[91mr6i.4xlarge\u001b[0m │ \u001b[91m#SBATCH \u001b[0m │\n", "│ │ │ │ │ │ \u001b[91m--partition=big\u001b[0m │\n", "│ │ │ │ │ │ \u001b[91m#SBATCH \u001b[0m │\n", "│ │ │ │ │ │ \u001b[91m--constraint=r6i4xlar…\u001b[0m │\n", "│ │ │ │ │ │ \u001b[91m#SBATCH -c=8\u001b[0m │\n", "├───────┼─────────────┼──────────┼─────┼──────────────┼────────────────────────┤\n", "│ \u001b[91mbig\u001b[0m │ \u001b[91mm5a8xlarge\u001b[0m │ \u001b[91m128\u001b[0m │ \u001b[91m16\u001b[0m │ \u001b[91mm5a.8xlarge\u001b[0m │ \u001b[91m#SBATCH \u001b[0m │\n", "│ │ │ │ │ │ \u001b[91m--partition=big\u001b[0m │\n", "│ │ │ │ │ │ \u001b[91m#SBATCH \u001b[0m │\n", "│ │ │ │ │ │ \u001b[91m--constraint=m5a8xlar…\u001b[0m │\n", "│ │ │ │ │ │ \u001b[91m#SBATCH -c=16\u001b[0m │\n", "├───────┼─────────────┼──────────┼─────┼──────────────┼────────────────────────┤\n", "│ \u001b[91mbig\u001b[0m │ \u001b[91mc6a16xlarge\u001b[0m │ \u001b[91m128\u001b[0m │ \u001b[91m32\u001b[0m │ \u001b[91mc6a.16xlarge\u001b[0m │ \u001b[91m#SBATCH \u001b[0m │\n", "│ │ │ │ │ │ \u001b[91m--partition=big\u001b[0m │\n", "│ │ │ │ │ │ \u001b[91m#SBATCH \u001b[0m │\n", "│ │ │ │ │ │ \u001b[91m--constraint=c6a16xla…\u001b[0m │\n", "│ │ │ │ │ │ \u001b[91m#SBATCH -c=32\u001b[0m │\n", "├───────┼─────────────┼──────────┼─────┼──────────────┼────────────────────────┤\n", "│ \u001b[91mbig\u001b[0m │ \u001b[91mm5a16xlarge\u001b[0m │ \u001b[91m256\u001b[0m │ \u001b[91m32\u001b[0m │ \u001b[91mm5a.16xlarge\u001b[0m │ \u001b[91m#SBATCH \u001b[0m │\n", "│ │ │ │ │ │ \u001b[91m--partition=big\u001b[0m │\n", "│ │ │ │ │ │ \u001b[91m#SBATCH \u001b[0m │\n", "│ │ │ │ │ │ \u001b[91m--constraint=m5a16xla…\u001b[0m │\n", "│ │ │ │ │ │ \u001b[91m#SBATCH -c=32\u001b[0m │\n", "├───────┼─────────────┼──────────┼─────┼──────────────┼────────────────────────┤\n", "│ \u001b[36mgpu\u001b[0m │ \u001b[36mp32xlarge\u001b[0m │ \u001b[36m61\u001b[0m │ \u001b[36m4\u001b[0m │ \u001b[36mp3.2xlarge\u001b[0m │ \u001b[36m#SBATCH \u001b[0m │\n", "│ │ │ │ │ │ \u001b[36m--partition=gpu\u001b[0m │\n", "│ │ │ │ │ │ \u001b[36m#SBATCH \u001b[0m │\n", "│ │ │ │ │ │ \u001b[36m--constraint=p32xlarge\u001b[0m │\n", "│ │ │ │ │ │ \u001b[36m#SBATCH -c=4\u001b[0m │\n", "├───────┼─────────────┼──────────┼─────┼──────────────┼────────────────────────┤\n", "│ \u001b[36mgpu\u001b[0m │ \u001b[36mp38xlarge\u001b[0m │ \u001b[36m244\u001b[0m │ \u001b[36m16\u001b[0m │ \u001b[36mp3.8xlarge\u001b[0m │ \u001b[36m#SBATCH \u001b[0m │\n", "│ │ │ │ │ │ \u001b[36m--partition=gpu\u001b[0m │\n", "│ │ │ │ │ │ \u001b[36m#SBATCH \u001b[0m │\n", "│ │ │ │ │ │ \u001b[36m--constraint=p38xlarge\u001b[0m │\n", "│ │ │ │ │ │ \u001b[36m#SBATCH -c=16\u001b[0m │\n", "├───────┼─────────────┼──────────┼─────┼──────────────┼────────────────────────┤\n", "│ \u001b[35mmem\u001b[0m │ \u001b[35mr6i2xlarge\u001b[0m │ \u001b[35m64\u001b[0m │ \u001b[35m4\u001b[0m │ \u001b[35mr6i.2xlarge\u001b[0m │ \u001b[35m#SBATCH \u001b[0m │\n", "│ │ │ │ │ │ \u001b[35m--partition=mem\u001b[0m │\n", "│ │ │ │ │ │ \u001b[35m#SBATCH \u001b[0m │\n", "│ │ │ │ │ │ \u001b[35m--constraint=r6i2xlar…\u001b[0m │\n", "│ │ │ │ │ │ \u001b[35m#SBATCH -c=4\u001b[0m │\n", "├───────┼─────────────┼──────────┼─────┼──────────────┼────────────────────────┤\n", "│ \u001b[35mmem\u001b[0m │ \u001b[35mm5a4xlarge\u001b[0m │ \u001b[35m64\u001b[0m │ \u001b[35m8\u001b[0m │ \u001b[35mm5a.4xlarge\u001b[0m │ \u001b[35m#SBATCH \u001b[0m │\n", "│ │ │ │ │ │ \u001b[35m--partition=mem\u001b[0m │\n", "│ │ │ │ │ │ \u001b[35m#SBATCH \u001b[0m │\n", "│ │ │ │ │ │ \u001b[35m--constraint=m5a4xlar…\u001b[0m │\n", "│ │ │ │ │ │ \u001b[35m#SBATCH -c=8\u001b[0m │\n", "├───────┼─────────────┼──────────┼─────┼──────────────┼────────────────────────┤\n", "│ \u001b[35mmem\u001b[0m │ \u001b[35mc6a8xlarge\u001b[0m │ \u001b[35m64\u001b[0m │ \u001b[35m16\u001b[0m │ \u001b[35mc6a.8xlarge\u001b[0m │ \u001b[35m#SBATCH \u001b[0m │\n", "│ │ │ │ │ │ \u001b[35m--partition=mem\u001b[0m │\n", "│ │ │ │ │ │ \u001b[35m#SBATCH \u001b[0m │\n", "│ │ │ │ │ │ \u001b[35m--constraint=c6a8xlar…\u001b[0m │\n", "│ │ │ │ │ │ \u001b[35m#SBATCH -c=16\u001b[0m │\n", "└───────┴─────────────┴──────────┴─────┴──────────────┴────────────────────────┘\n" ] } ], "source": [ "pcluster-helper sinfo" ] }, { "cell_type": "markdown", "id": "9e499f88-6e7d-4942-b1b3-0919c3e40c5c", "metadata": {}, "source": [ "### Submit a sample job\n", "\n", "Grab the text below and create a file called `my-job.sh`.\n", "\n", "```bash\n", "#!/bin/bash\n", "\n", "# my-job.sh\n", "\n", "#SBATCH --job-name=my-job-name # Job name\n", "#SBATCH --partition=MY_PARTITION # sbatch job parition from the table above\n", "#SBATCH -c=CPU # sbatch cpu from the above table\n", "#SBATCH --constraint # sbatch constraint from the table above\n", "#SBATCH --time=01:00:00 # Time limit hrs:min:sec\n", "#SBATCH --output=my_job_%j.log # Standard output and error log\n", "\n", "pwd; hostname; date\n", "\n", "date\n", "\n", "sleep 10\n", "```\n", "\n", "Once you have your script make it executable and submit it.\n", "\n", "```\n", "chmod 777 ./my-job.sh\n", "sbatch ./my-job.sh\n", "```" ] }, { "cell_type": "markdown", "id": "d36c38f9-e8fe-4371-9fd3-78690c4f87da", "metadata": {}, "source": [ "### Troubleshooting\n", "\n", "Submitting jobs to SLURM on AWS is slightly different than submitting jobs to an in house data center. You do not specify memory, but instead specify the instance type and a constraint on the EC2 instance type. " ] }, { "cell_type": "markdown", "id": "5c92c961-2c31-4e61-9676-a9f2dc907a4e", "metadata": {}, "source": [ "## Additional Information\n", "\n", "SLURM provides much more information than given by the pcluster-helper utility.\n", "\n", "**Cluster Info**\n", "\n", "```bash\n", "sinfo\n", "```\n", "\n", "**Job Info**\n", "\n", "```bash\n", "squeue\n", "```\n", "\n", "**Job Info for a Single User**\n", "\n", "```bash\n", "squeue -u $USER\n", "```\n", "\n", "**Show detailed job information**\n", "\n", "```bash\n", "scontrol show job \n", "```" ] }, { "cell_type": "markdown", "id": "13ae24b3-74a7-4c56-856b-6d5fbb4bd131", "metadata": {}, "source": [ "*These are examples from this particular cluster. Your output will be slightly different depending on your cluster configuration*" ] }, { "cell_type": "code", "execution_count": 4, "id": "3695d533-f77f-42d9-82bd-ae247538b1b0", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON)\n", " 185 mem spawner- user2135 R 44:06 1 mem-dy-r6i2xlarge-1\n" ] } ], "source": [ "squeue -u $USER" ] }, { "cell_type": "code", "execution_count": 5, "id": "888a9287-c911-448d-a8e8-fb1472c2030c", "metadata": {}, "outputs": [], "source": [ "# scontrol show job some_job_id" ] }, { "cell_type": "code", "execution_count": 6, "id": "d8051163-c7bc-4739-904e-2f5973a31f8e", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "PARTITION AVAIL TIMELIMIT NODES STATE NODELIST\n", "basic* up infinite 496 idle~ basic-dy-t2medium-[1-100],basic-dy-t2micro-[1-100],basic-dy-t3alarge-[5-100],basic-dy-t3axlarge-[1-100],basic-dy-t32xlarge-[1-100]\n", "basic* up infinite 4 idle basic-dy-t3alarge-[1-4]\n", "mem up infinite 148 idle~ mem-dy-c6a8xlarge-[1-50],mem-dy-m5a4xlarge-[1-50],mem-dy-r6i2xlarge-[3-50]\n", "mem up infinite 2 alloc mem-dy-r6i2xlarge-[1-2]\n", "gpu up infinite 4 idle~ gpu-dy-p32xlarge-[1-2],gpu-dy-p38xlarge-[1-2]\n" ] } ], "source": [ "sinfo" ] }, { "cell_type": "code", "execution_count": null, "id": "26ee00fd-46bc-46bc-94ba-37eefc3f0025", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "9f0a2a11-2df4-47f8-a39a-8e5144a8b785", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Bash", "language": "bash", "name": "bash" }, "language_info": { "codemirror_mode": "shell", "file_extension": ".sh", "mimetype": "text/x-sh", "name": "bash" } }, "nbformat": 4, "nbformat_minor": 5 }