Skip to content

Commit 1aa640d

Browse files
Peter JohnsonPeter Johnson
authored andcommitted
Add tokenizers to dockerfile
1 parent 257ed47 commit 1aa640d

1 file changed

Lines changed: 1 addition & 2 deletions

File tree

Dockerfile

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ RUN --mount=type=cache,target=$POETRY_CACHE_DIR \
2222
find /app/.venv -name "*.md" -delete && \
2323
find /app/.venv -name "*.txt" -delete &&\
2424
# Download NLTK corpora into the venv so it's cached ---
25-
/app/.venv/bin/python -m nltk.downloader -d /app/.venv/nltk_data brown reuters gutenberg webtext
25+
/app/.venv/bin/python -m nltk.downloader -d /app/.venv/nltk_data brown reuters gutenberg webtext punkt punkt_tab
2626

2727
ENV NLTK_DATA=/app/.venv/nltk_data
2828

@@ -32,7 +32,6 @@ ENV VIRTUAL_ENV=/app/.venv \
3232
PATH="/app/.venv/bin:$PATH"
3333
ENV NLTK_DATA=/app/.venv/nltk_data
3434

35-
3635
# Copy the cleaned virtual environment
3736
COPY --from=builder ${VIRTUAL_ENV} ${VIRTUAL_ENV}
3837

0 commit comments

Comments
 (0)