diff --git a/bin/compile b/bin/compile index 52293a07847127c4cd3ab701ddb7016250bca36f..bda4df3472584b43ed4659430f85c797054a3acc 100755 --- a/bin/compile +++ b/bin/compile @@ -193,6 +193,9 @@ source $BIN_DIR/steps/gdal # Install dependencies with Pip (where the magic happens). source $BIN_DIR/steps/pip-install +# Support for NLTK corpora. +sub-env $BIN_DIR/steps/nltk + # Support for pip install -e. rm -fr $BUILD_DIR/.heroku/src deep-cp /app/.heroku/src $BUILD_DIR/.heroku/src diff --git a/bin/steps/nltk b/bin/steps/nltk new file mode 100755 index 0000000000000000000000000000000000000000..8cb76a1c490f750dc3446bc9a169dd9268eb8f1f --- /dev/null +++ b/bin/steps/nltk @@ -0,0 +1,38 @@ +#!/usr/bin/env bash + +# This script serves as the NLTK build step of the +# [**Python Buildpack**](https://github.com/heroku/heroku-buildpack-python) +# compiler. +# +# A [buildpack](https://devcenter.heroku.com/articles/buildpacks) is an +# adapter between a Python application and Heroku's runtime. +# +# This script is invoked by [`bin/compile`](/). + +# Syntax sugar. +source $BIN_DIR/utils + +bpwatch start nltk_download + +export NLTK_DATA_DIR="$BUILD_DIR/nltk_data" +export NLTK_DATA="$BUILD_DIR/nltk_data" + +# Check that nltk was installed by pip, otherwise obviously not needed +python -m nltk.downloader -h >/dev/null 2>&1 +if [ $? -eq 0 ]; then + puts-step "Downloading NLTK corpora..." + nltk_packages_definition="$BUILD_DIR/nltk.txt" + if [ -f "$nltk_packages_definition" ]; then + nltk_packages=$(tr "\n" " " < "$nltk_packages_definition") + puts-step "Downloading NLTK packages: $nltk_packages" + python -m nltk.downloader -d $BUILD_DIR/.heroku/python/nltk_data $nltk_packages | indent + set-env NLTK_DATA "/app/.heroku/python/nltk_data" + else + puts-warn "nltk.txt not found, not downloading any corpora" + fi +else + puts-warn "nltk not apparently installed, not downloading packages" +fi + + +bpwatch stop nltk_download