-
Notifications
You must be signed in to change notification settings - Fork 653
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Qing Lan
committed
May 16, 2023
1 parent
6372b42
commit 790fce5
Showing
2 changed files
with
133 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
name: Build Triton Server and FasterTransformers | ||
|
||
on: | ||
workflow_dispatch: | ||
inputs: | ||
triton: | ||
description: 'triton branch version' | ||
required: true | ||
default: 'r23.04' | ||
fastertransformer: | ||
description: 'fastertransformer branch/tag version' | ||
required: true | ||
default: 'main' | ||
|
||
jobs: | ||
build-triton: | ||
if: github.repository == 'deepjavalibrary/djl' | ||
runs-on: ubuntu-latest | ||
steps: | ||
- uses: actions/checkout@v3 | ||
with: | ||
repository: triton-inference-server/server | ||
ref: ${{ github.event.inputs.triton }} | ||
- name: Set up Python3 | ||
uses: actions/setup-python@v4 | ||
with: | ||
python-version: '3.x' | ||
- name: Build Triton Binary | ||
run: | | ||
python3 build.py --enable-logging --enable-metrics --enable-stats --enable-cpu-metrics | ||
- name: Configure AWS Credentials | ||
uses: aws-actions/configure-aws-credentials@v2 | ||
with: | ||
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} | ||
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} | ||
aws-region: us-east-2 | ||
- name: Copy files to S3 with the AWS CLI | ||
run: | | ||
aws s3 cp build/install/lib/libtritonserver.so s3://djl-ai/publish/tritonserver/${{ github.event.inputs.triton }}/ | ||
aws cloudfront create-invalidation --distribution-id E371VB8JQ6NRVY --paths "/tritonserver/${{ github.event.inputs.triton }}/*" | ||
create-runner: | ||
if: github.repository == 'deepjavalibrary/djl' | ||
runs-on: [ self-hosted, scheduler ] | ||
steps: | ||
- name: Create new CPU instance | ||
id: create_cpu | ||
run: | | ||
cd /home/ubuntu/djl_benchmark_script/scripts | ||
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \ | ||
https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \ | ||
--fail \ | ||
| jq '.token' | tr -d '"' ) | ||
./start_instance.sh action_cpu $token djl-serving | ||
outputs: | ||
cpu_instance_id: ${{ steps.create_cpu.outputs.action_cpu_instance_id }} | ||
|
||
|
||
build-fastertransformer: | ||
if: github.repository == 'deepjavalibrary/djl' | ||
runs-on: [ self-hosted, cpu ] | ||
container: deepjavalibrary/djl-serving:fastertransformer-nightly | ||
timeout-minutes: 60 | ||
needs: create-runner | ||
steps: | ||
- uses: actions/checkout@v3 | ||
- name: Build FasterTransformers | ||
run: | | ||
tools/scripts/build_ft_deps.sh ${{ github.event.inputs.fastertransformer }} ${{ github.event.inputs.triton }} | ||
- name: Configure AWS Credentials | ||
uses: aws-actions/configure-aws-credentials@v2 | ||
with: | ||
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} | ||
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} | ||
aws-region: us-east-2 | ||
- name: Copy files to S3 with the AWS CLI | ||
run: | | ||
aws s3 sync /tmp/binaries/ s3://djl-ai/publish/fastertransformer/${{ github.event.inputs.fastertransformer }}/ | ||
aws cloudfront create-invalidation --distribution-id E371VB8JQ6NRVY --paths "/fastertransformer/${{ github.event.inputs.fastertransformer }}/*" | ||
stop-runner: | ||
if: ${{ github.repository == 'deepjavalibrary/djl' && always() }} | ||
runs-on: [ self-hosted, scheduler ] | ||
needs: [ create-runner, build-fastertransformer] | ||
steps: | ||
- name: Stop all instances | ||
run: | | ||
cd /home/ubuntu/djl_benchmark_script/scripts | ||
instance_id=${{ needs.create-runner.outputs.cpu_instance_id }} | ||
./stop_instance.sh $instance_id |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
#!/usr/bin/env bash | ||
|
||
set -ex | ||
|
||
FT_VERSION=$1 | ||
NVIDIA_TRITON_SERVER_VERSION=$2 | ||
|
||
apt-get install -y rapidjson-dev | ||
|
||
pushd /tmp | ||
|
||
git clone https:/NVIDIA/FasterTransformer.git -b ${FT_VERSION} | ||
|
||
export FT_DIR=/tmp/FasterTransformer | ||
mkdir -p /tmp/binaries | ||
|
||
# Build FasterTransformer Triton library | ||
git clone https:/triton-inference-server/fastertransformer_backend.git | ||
mkdir -p fastertransformer_backend/build | ||
cd fastertransformer_backend/build | ||
cmake \ | ||
-D CMAKE_EXPORT_COMPILE_COMMANDS=1 \ | ||
-D CMAKE_BUILD_TYPE=Release \ | ||
-D ENABLE_FP8=OFF \ | ||
-D CMAKE_INSTALL_PREFIX=/opt/tritonserver \ | ||
-D TRITON_COMMON_REPO_TAG="${NVIDIA_TRITON_SERVER_VERSION}" \ | ||
-D TRITON_CORE_REPO_TAG="${NVIDIA_TRITON_SERVER_VERSION}" \ | ||
-D TRITON_BACKEND_REPO_TAG="${NVIDIA_TRITON_SERVER_VERSION}" \ | ||
.. | ||
make -j$(nproc) | ||
cp lib/*.so /tmp/binaries/ | ||
cd ../../ | ||
|
||
# Build FasterTransformer TH Ops library | ||
mkdir -p FasterTransformer/build | ||
cd FasterTransformer/build | ||
git submodule init && git submodule update | ||
cmake -DCMAKE_BUILD_TYPE=Release -DSM=70,75,80,86 -DBUILD_PYT=ON -DBUILD_MULTI_GPU=ON .. | ||
make -j$(nproc) | ||
cp lib/libth_transformer.so /tmp/binaries/ | ||
cd ../../ | ||
|
||
popd |