diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 6ed64ff15..722a13806 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -9,12 +9,16 @@ on: - '**.go' - 'Makefile' - 'go.**' + - 'pkg/**/*.sh' + - 'pkg/**/*.envd' pull_request: paths: - '.github/workflows/CI.yml' - '**.go' - 'Makefile' - 'go.**' + - 'pkg/**/*.sh' + - 'pkg/**/*.envd' merge_group: workflow_dispatch: @@ -51,7 +55,7 @@ jobs: args: --timeout=5m version: latest # Ref https://github.com/golangci/golangci-lint-action/issues/244 - skip-pkg-cache: true + skip-cache: true test: name: test env: diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index d115248d5..b59f892f8 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -30,7 +30,23 @@ jobs: args: --timeout=5m version: latest # Ref https://github.com/golangci/golangci-lint-action/issues/244 - skip-pkg-cache: true + skip-cache: true + build: + name: build + if: github.repository == 'tensorchord/envd' + strategy: + matrix: + os: [ ubuntu-latest, macos-latest ] + runs-on: ${{ matrix.os }} + steps: + - name: Check out code + uses: actions/checkout@v4 + - name: Setup Go + uses: actions/setup-go@v5 + with: + go-version: 'stable' + - name: Build + run: make test: name: test if: github.repository == 'tensorchord/envd' @@ -54,21 +70,13 @@ jobs: git diff --cached --exit-code || (echo 'Please run "make generate" to verify generate' && exit 1); - name: Test run: make test - - name: Upload coverage report - uses: actions/upload-artifact@v4 - with: - name: coverage-out - path: coverage.out e2e-cli: name: e2e-cli if: github.repository == 'tensorchord/envd' env: # Disable telemetry. ENVD_ANALYTICS: false - strategy: - matrix: - os: [ ubuntu-latest ] - runs-on: ${{ matrix.os }} + runs-on: ubuntu-22.04 steps: - name: Check out code uses: actions/checkout@v4 @@ -82,21 +90,13 @@ jobs: run: make e2e-cli-test env: GIT_LATEST_TAG: ${{ steps.get-latest-tag.outputs.tag }} - - name: Upload coverage report - uses: actions/upload-artifact@v4 - with: - name: e2e-cli-coverage-out - path: e2e-cli-coverage.out e2e-lang: name: e2e-lang if: github.repository == 'tensorchord/envd' env: # Disable telemetry. ENVD_ANALYTICS: false - strategy: - matrix: - os: [ ubuntu-latest ] - runs-on: ${{ matrix.os }} + runs-on: ubuntu-22.04 steps: - name: Check out code uses: actions/checkout@v4 @@ -110,77 +110,13 @@ jobs: run: make e2e-lang-test env: GIT_LATEST_TAG: ${{ steps.get-latest-tag.outputs.tag }} - - name: Upload coverage report - uses: actions/upload-artifact@v4 - with: - name: e2e-lang-coverage-out - path: e2e-lang-coverage.out - # notifies that all test jobs are finished. - report: - if: github.repository == 'tensorchord/envd' - needs: - - test - - e2e-cli - - e2e-lang - runs-on: ubuntu-latest - steps: - - name: Check out code - uses: actions/checkout@v4 - - name: Setup Go - uses: actions/setup-go@v5 - with: - go-version: 'stable' - - name: Install bins - run: | - go install github.com/mattn/goveralls@latest - go install github.com/wadey/gocovmerge@latest - - name: Get coverage report - uses: actions/download-artifact@v4 - with: - name: coverage-out - path: coverage.out - - name: Get cli e2e coverage report - uses: actions/download-artifact@v4 - with: - name: e2e-cli-coverage-out - path: e2e-cli-coverage.out - - name: Get language e2e coverage report - uses: actions/download-artifact@v4 - with: - name: e2e-lang-coverage-out - path: e2e-lang-coverage.out - # - name: Send coverage - # env: - # COVERALLS_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # run: | - # gocovmerge e2e-coverage.out coverage.out > final.out - # goveralls -coverprofile=final.out -service=github - build: - name: build - if: github.repository == 'tensorchord/envd' - strategy: - matrix: - os: [ ubuntu-latest, macos-latest ] - runs-on: ${{ matrix.os }} - steps: - - name: Check out code - uses: actions/checkout@v4 - - name: Setup Go - uses: actions/setup-go@v5 - with: - go-version: 'stable' - - name: Build - run: make e2e-doc: name: e2e-doc if: github.repository == 'tensorchord/envd' env: # Disable telemetry. ENVD_ANALYTICS: false - strategy: - matrix: - os: [ubuntu-latest] - runs-on: ${{ matrix.os }} + runs-on: ubuntu-22.04 steps: - name: Check out code uses: actions/checkout@v4 @@ -194,8 +130,3 @@ jobs: run: make e2e-doc-test env: GIT_LATEST_TAG: ${{ steps.get-latest-tag.outputs.tag }} - - name: Upload coverage report - uses: actions/upload-artifact@v4 - with: - name: e2e-doc-coverage-out - path: e2e-doc-coverage.out diff --git a/e2e/docs/testdata/julia_mnist/build.envd b/e2e/docs/testdata/julia_mnist/build.envd index 71db1252e..511f4c252 100644 --- a/e2e/docs/testdata/julia_mnist/build.envd +++ b/e2e/docs/testdata/julia_mnist/build.envd @@ -1,7 +1,7 @@ # syntax=v1 + def build(): base(dev=True) - install.julia() install.julia_packages(name=["Flux", "MLDatasets"]) - runtime.command(commands={"julia-mnist": "julia mnist.jl"}) + runtime.command(commands={"julia-mnist": "julia mlp_mnist.jl"}) diff --git a/e2e/docs/testdata/julia_mnist/mlp_mnist.jl b/e2e/docs/testdata/julia_mnist/mlp_mnist.jl new file mode 100644 index 000000000..050e6dd79 --- /dev/null +++ b/e2e/docs/testdata/julia_mnist/mlp_mnist.jl @@ -0,0 +1,85 @@ +# https://github.com/FluxML/model-zoo/blob/master/vision/mlp_mnist/mlp_mnist.jl +# License: MIT Copyright (c) 2017 by Flux contributors + +# Simple multi-layer perceptron, for the MNIST hand-written digits. +# This example does not use a GPU, it's small enough not to need one. + +using Flux, MLDatasets, Statistics + +# Our model is very simple: Its one "hidden layer" has 32 "neurons" each connected to every input pixel. +# Each has a sigmoid nonlinearity, and is connected to every "neuron" in the output layer. +# Finally, softmax produces probabilities, i.e. positive numbers which add up to 1: + +model = Chain(Dense(28^2 => 32, sigmoid), Dense(32 => 10), softmax) + +#===== DATA =====# + +# Calling MLDatasets.MNIST() will download the dataset if necessary, +# and return a struct containing it. +# It takes a few seconds to read from disk each time, so do this once: + +train_data = MLDatasets.MNIST() # i.e. split=:train +test_data = MLDatasets.MNIST(split=:test) + +# train_data.features is a 28×28×60000 Array{Float32, 3} of the images. +# We need a 2D array for our model. Let's combine the reshape needed with +# other pre-processing, in a function: + +function simple_loader(data::MNIST; batchsize::Int=64) + x2dim = reshape(data.features, 28^2, :) + yhot = Flux.onehotbatch(data.targets, 0:9) + Flux.DataLoader((x2dim, yhot); batchsize, shuffle=true) +end + +# train_data.targets is a 60000-element Vector{Int}, of labels from 0 to 9. +# Flux.onehotbatch([0,1,9], 0:9) makes a matrix of 0 and 1. + +simple_loader(train_data) # returns a DataLoader, with first element a tuple like this: + +x1, y1 = first(simple_loader(train_data)); # (784×64 Matrix{Float32}, 10×64 OneHotMatrix) + +model(x1) # x1 is the right shape for our model + +y1 # y1 is the same shape as the model output. + +# @show Flux.crossentropy(model(x1), y1); # This will be our loss function + +#===== ACCURACY =====# + +# We're going to log accuracy and loss during training. There's no advantage to +# calculating these on minibatches, since MNIST is small enough to do it at once. + +function simple_accuracy(model, data::MNIST=test_data) + (x, y) = only(simple_loader(data; batchsize=length(data))) # make one big batch + y_hat = model(x) + iscorrect = Flux.onecold(y_hat) .== Flux.onecold(y) # BitVector + acc = round(100 * mean(iscorrect); digits=2) +end + +# @show simple_accuracy(model); # accuracy about 10%, on training data, before training! + +#===== TRAINING =====# + +# Make a dataloader using the desired batchsize: + +train_loader = simple_loader(train_data, batchsize = 256) + +# Initialise storage needed for the Adam optimiser, with our chosen learning rate: + +opt_state = Flux.setup(Adam(3e-4), model); + +# Then train for 10 epochs, printing out details as we go: + +for epoch in 1:10 + loss = 0.0 + for (x, y) in train_loader + # Compute the loss and the gradients: + l, gs = Flux.withgradient(m -> Flux.crossentropy(m(x), y), model) + # Update the model parameters (and the Adam momenta): + Flux.update!(opt_state, model, gs[1]) + # Accumulate the mean loss, just for logging: + loss += l / length(train_loader) + end +end + +print(simple_accuracy(model, test_data)) diff --git a/e2e/docs/testdata/julia_mnist/mnist.jl b/e2e/docs/testdata/julia_mnist/mnist.jl deleted file mode 100644 index e4327ad3a..000000000 --- a/e2e/docs/testdata/julia_mnist/mnist.jl +++ /dev/null @@ -1,43 +0,0 @@ -using Flux, MLDatasets -using Flux: train!, onehotbatch - -ENV["DATADEPS_ALWAYS_ACCEPT"] = "true" - -# Load training data (images, labels) -x_train, y_train = MLDatasets.MNIST(split=:train)[:] -# Load test data (images, labels) -x_test, y_test = MLDatasets.MNIST(split=:test)[:] -# Convert grayscale to float -x_train = Float32.(x_train) -# Create labels batch -y_train = Flux.onehotbatch(y_train, 0:9) - -model = Chain( - Dense(784, 256, relu), - Dense(256, 64, relu), - Dense(64, 10, relu), - softmax -) - -loss(x, y) = Flux.Losses.logitcrossentropy(model(x), y) - -optimizer = ADAM(0.0001) - -parameters = Flux.params(model) -# flatten() function converts array 28x28x60000 into 784x60000 (28*28x60000) -train_data = [(Flux.flatten(x_train), Flux.flatten(y_train))] -# Range in loop can be used smaller -for i in 1:100 - Flux.train!(loss, parameters, train_data, optimizer) -end - -test_data = [(Flux.flatten(x_test), y_test)] -accuracy = 0 -for i in 1:length(y_test) - global accuracy - if findmax(model(test_data[1][1][:, i]))[2] - 1 == y_test[i] - accuracy = accuracy + 1 - end -end - -print(accuracy / length(y_test)) \ No newline at end of file diff --git a/pkg/lang/ir/v1/julia.sh b/pkg/lang/ir/v1/julia.sh index 9d5c5c71d..c5ba65cc2 100644 --- a/pkg/lang/ir/v1/julia.sh +++ b/pkg/lang/ir/v1/julia.sh @@ -1,12 +1,18 @@ set -o pipefail && \ -JULIA_URL="https://julialang-s3.julialang.org/bin/linux/x64/1.8/julia-1.8.5-linux-x86_64.tar.gz"; \ -SHA256SUM="e71a24816e8fe9d5f4807664cbbb42738f5aa9fe05397d35c81d4c5d649b9d05"; \ +UNAME_M="$(uname -m)" && \ +if [ "${UNAME_M}" = "x86_64" ]; then \ + JULIA_URL="https://julialang-s3.julialang.org/bin/linux/x64/1.10/julia-1.10.8-linux-x86_64.tar.gz"; \ + SHA256SUM="0410175aeec3df63173c15187f2083f179d40596d36fd3a57819cc5f522ae735"; \ +elif [ "{UNAME_M}" = "aarch64" ]; then \ + JULIA_URL="https://julialang-s3.julialang.org/bin/linux/aarch64/1.10/julia-1.10.8-linux-aarch64.tar.gz" \ + SHA256SUM="8d63dd12595a08edc736be8d6c4fea1840f137b81c62079d970dbd1be448b8cd"; \ +fi && \ wget "${JULIA_URL}" -O /tmp/julia.tar.gz && \ echo "${SHA256SUM} /tmp/julia.tar.gz" > /tmp/sha256sum && \ sha256sum -c -s /tmp/sha256sum EXIT_CODE=$? -if [ $EXIT_CODE -ne 0 ]; then +if [ $EXIT_CODE -ne 0 ]; then echo "CHECKSUM FAILED" && \ rm /tmp/julia.tar.gz && \ wget "${JULIA_URL}" -O /tmp/julia.tar.gz && \ @@ -14,4 +20,3 @@ if [ $EXIT_CODE -ne 0 ]; then else echo "CHECKSUM PASSED" fi -