cloudformation-template.yml 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167
  1. AWSTemplateFormatVersion: '2010-09-09'
  2. Description: 'CloudFormation template to create a GPU-enabled Jupyter notebook in SageMaker with an execution role and
  3. LLMs-from-scratch Repo'
  4. Parameters:
  5. NotebookName:
  6. Type: String
  7. Default: 'LLMsFromScratchNotebook'
  8. DefaultRepoUrl:
  9. Type: String
  10. Default: 'https://github.com/rasbt/LLMs-from-scratch.git'
  11. Resources:
  12. SageMakerExecutionRole:
  13. Type: AWS::IAM::Role
  14. Properties:
  15. AssumeRolePolicyDocument:
  16. Version: '2012-10-17'
  17. Statement:
  18. - Effect: Allow
  19. Principal:
  20. Service:
  21. - sagemaker.amazonaws.com
  22. Action:
  23. - sts:AssumeRole
  24. ManagedPolicyArns:
  25. - arn:aws:iam::aws:policy/AmazonSageMakerFullAccess
  26. - arn:aws:iam::aws:policy/AmazonBedrockFullAccess
  27. KmsKey:
  28. Type: AWS::KMS::Key
  29. Properties:
  30. Description: 'KMS key for SageMaker notebook'
  31. KeyPolicy:
  32. Version: '2012-10-17'
  33. Statement:
  34. - Effect: Allow
  35. Principal:
  36. AWS: !Sub 'arn:aws:iam::${AWS::AccountId}:root'
  37. Action: 'kms:*'
  38. Resource: '*'
  39. EnableKeyRotation: true
  40. KmsKeyAlias:
  41. Type: AWS::KMS::Alias
  42. Properties:
  43. AliasName: !Sub 'alias/${NotebookName}-kms-key'
  44. TargetKeyId: !Ref KmsKey
  45. TensorConfigLifecycle:
  46. Type: AWS::SageMaker::NotebookInstanceLifecycleConfig
  47. Properties:
  48. NotebookInstanceLifecycleConfigName: "TensorConfigv241128"
  49. OnCreate:
  50. - Content: !Base64 |
  51. #!/bin/bash
  52. set -e
  53. # Create a startup script that will run in the background
  54. cat << 'EOF' > /home/ec2-user/SageMaker/setup-environment.sh
  55. #!/bin/bash
  56. sudo -u ec2-user -i <<'INNEREOF'
  57. unset SUDO_UID
  58. # Install a separate conda installation via Miniconda
  59. WORKING_DIR=/home/ec2-user/SageMaker/custom-miniconda
  60. mkdir -p "$WORKING_DIR"
  61. wget https://repo.anaconda.com/miniconda/Miniconda3-4.7.12.1-Linux-x86_64.sh -O "$WORKING_DIR/miniconda.sh"
  62. bash "$WORKING_DIR/miniconda.sh" -b -u -p "$WORKING_DIR/miniconda"
  63. rm -rf "$WORKING_DIR/miniconda.sh"
  64. # Ensure we're using the Miniconda conda
  65. export PATH="$WORKING_DIR/miniconda/bin:$PATH"
  66. # Initialize conda
  67. "$WORKING_DIR/miniconda/bin/conda" init bash
  68. source ~/.bashrc
  69. # Create and activate environment
  70. KERNEL_NAME="tensorflow2_p39"
  71. PYTHON="3.9"
  72. "$WORKING_DIR/miniconda/bin/conda" create --yes --name "$KERNEL_NAME" python="$PYTHON"
  73. eval "$("$WORKING_DIR/miniconda/bin/conda" shell.bash activate "$KERNEL_NAME")"
  74. # Install CUDA toolkit and cuDNN
  75. "$WORKING_DIR/miniconda/bin/conda" install --yes cudatoolkit=11.8 cudnn
  76. # Install ipykernel
  77. "$WORKING_DIR/miniconda/envs/$KERNEL_NAME/bin/pip" install --quiet ipykernel
  78. # Install PyTorch with CUDA support
  79. "$WORKING_DIR/miniconda/envs/$KERNEL_NAME/bin/pip3" install torch==2.1.0 torchvision==0.16.0 torchaudio==2.1.0 --index-url https://download.pytorch.org/whl/cu118
  80. # Install other packages
  81. "$WORKING_DIR/miniconda/envs/tensorflow2_p39/bin/pip" install tensorflow[gpu]
  82. "$WORKING_DIR/miniconda/bin/conda" install --yes tensorflow-gpu
  83. "$WORKING_DIR/miniconda/envs/tensorflow2_p39/bin/pip" install tensorflow==2.15.0
  84. "$WORKING_DIR/miniconda/bin/conda" install --yes setuptools tiktoken tqdm numpy pandas psutil
  85. "$WORKING_DIR/miniconda/bin/conda" install -y jupyterlab==4.0
  86. "$WORKING_DIR/miniconda/envs/tensorflow2_p39/bin/pip" install matplotlib==3.7.1
  87. # Create a flag file to indicate setup is complete
  88. touch /home/ec2-user/SageMaker/setup-complete
  89. INNEREOF
  90. EOF
  91. # Make the script executable and run it in the background
  92. chmod +x /home/ec2-user/SageMaker/setup-environment.sh
  93. sudo -u ec2-user nohup /home/ec2-user/SageMaker/setup-environment.sh > /home/ec2-user/SageMaker/setup.log 2>&1 &
  94. OnStart:
  95. - Content: !Base64 |
  96. #!/bin/bash
  97. set -e
  98. # Check if setup is still running or not started
  99. if ! [ -f /home/ec2-user/SageMaker/setup-complete ]; then
  100. echo "Setup still in progress or not started. Check setup.log for details."
  101. exit 0
  102. fi
  103. sudo -u ec2-user -i <<'EOF'
  104. unset SUDO_UID
  105. WORKING_DIR=/home/ec2-user/SageMaker/custom-miniconda
  106. source "$WORKING_DIR/miniconda/bin/activate"
  107. for env in $WORKING_DIR/miniconda/envs/*; do
  108. BASENAME=$(basename "$env")
  109. source activate "$BASENAME"
  110. python -m ipykernel install --user --name "$BASENAME" --display-name "Custom ($BASENAME)"
  111. done
  112. EOF
  113. echo "Restarting the Jupyter server.."
  114. CURR_VERSION=$(cat /etc/os-release)
  115. if [[ $CURR_VERSION == *$"http://aws.amazon.com/amazon-linux-ami/"* ]]; then
  116. sudo initctl restart jupyter-server --no-wait
  117. else
  118. sudo systemctl --no-block restart jupyter-server.service
  119. fi
  120. SageMakerNotebookInstance:
  121. Type: AWS::SageMaker::NotebookInstance
  122. Properties:
  123. InstanceType: ml.g4dn.xlarge
  124. NotebookInstanceName: !Ref NotebookName
  125. RoleArn: !GetAtt SageMakerExecutionRole.Arn
  126. DefaultCodeRepository: !Ref DefaultRepoUrl
  127. KmsKeyId: !GetAtt KmsKey.Arn
  128. PlatformIdentifier: notebook-al2-v2
  129. VolumeSizeInGB: 50
  130. LifecycleConfigName: !GetAtt TensorConfigLifecycle.NotebookInstanceLifecycleConfigName
  131. Outputs:
  132. NotebookInstanceName:
  133. Description: The name of the created SageMaker Notebook Instance
  134. Value: !Ref SageMakerNotebookInstance
  135. ExecutionRoleArn:
  136. Description: The ARN of the created SageMaker Execution Role
  137. Value: !GetAtt SageMakerExecutionRole.Arn
  138. KmsKeyArn:
  139. Description: The ARN of the created KMS Key for the notebook
  140. Value: !GetAtt KmsKey.Arn