Skip to content

Commit

Permalink
Merge pull request #213 from nltk/pickle-patch
Browse files Browse the repository at this point in the history
Convert the pickle tagsets dictionary to json
  • Loading branch information
alvations authored Jul 5, 2024
2 parents 6651c03 + fd5bcb6 commit 3b48d69
Show file tree
Hide file tree
Showing 4 changed files with 5 additions and 0 deletions.
1 change: 1 addition & 0 deletions collections/all-nltk.xml
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@
<item ref="swadesh" />
<item ref="switchboard" />
<item ref="tagsets" />
<item ref="tagsets_json" />
<item ref="timit" />
<item ref="toolbox" />
<item ref="treebank" />
Expand Down
1 change: 1 addition & 0 deletions collections/all.xml
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@
<item ref="swadesh" />
<item ref="switchboard" />
<item ref="tagsets" />
<item ref="tagsets_json" />
<item ref="timit" />
<item ref="toolbox" />
<item ref="treebank" />
Expand Down
3 changes: 3 additions & 0 deletions index.xml
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@
<package id="swadesh" name="Swadesh Wordlists" webpage="http://en.wiktionary.org/wiki/Appendix:Swadesh_list" license="GNU Free Documentation License" unzip="1" unzipped_size="39998" size="22828" checksum="6612ccb71f327e85780dc7813dee40f6" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/swadesh.zip" />
<package id="switchboard" name="Switchboard Corpus Sample" sample="True" license="Permission is granted for use of this material in accordance with the Open Content License [http://opencontent.org/opl.shtml]. This corpus contains transcripts and annotations for 36 calls from the Switchboard Corpus [http://www.ldc.upenn.edu/Catalog/LDC93S7.html]." unzip="1" unzipped_size="2541179" size="791161" checksum="878df010a9f2c2d0a6546a8365f10595" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/switchboard.zip" />
<package id="tagsets" name="Help on Tagsets" author="UCREL, Lancaster University" languages="English" unzip="1" unzipped_size="79723" size="34531" checksum="e15834e0dd89b107925af6bb11a8eaa4" subdir="help" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/help/tagsets.zip" />
<package id="tagsets_json" name="Help on Tagsets" author="UCREL, Lancaster University" languages="English" unzip="1" unzipped_size="0" size="742" checksum="bc2b6c611e9a20c43f4b204edcf4e2ad" subdir="help" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/help/tagsets_json.zip" />
<package id="timit" name="TIMIT Corpus Sample" sample="True" license="This corpus sample is Copyright 1993 Linguistic Data Consortium, and is distributed under the terms of the Creative Commons Attribution, Non-Commercial, ShareAlike license. http://creativecommons.org/" webpage="http://www.ldc.upenn.edu/Catalog/CatalogEntry.jsp?catalogId=LDC93S1" unzip="1" unzipped_size="31932925" size="22251869" checksum="34c047c4749a811287f2c652104d7849" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/timit.zip" />
<package id="toolbox" name="Toolbox Sample Files" unzip="1" unzipped_size="829593" size="250616" checksum="26657c1b8b5f5afdc3d5d754393a9216" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/toolbox.zip" />
<package id="treebank" name="Penn Treebank Sample" sample="True" copyright="Copyright (C) 1995 University of Pennsylvania" license="This is a 10% fragment of Penn Treebank, (C) LDC 1995. It is made available under fair use for the purposes of illustrating NLTK tools for tokenizing, tagging, chunking and parsing. This data is for non-commercial use only." unzip="1" unzipped_size="5963497" size="1740034" checksum="78c24a97940c2504d0ad35dd3f8a560b" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/treebank.zip" />
Expand Down Expand Up @@ -213,6 +214,7 @@
<item ref="swadesh" />
<item ref="switchboard" />
<item ref="tagsets" />
<item ref="tagsets_json" />
<item ref="timit" />
<item ref="toolbox" />
<item ref="treebank" />
Expand Down Expand Up @@ -423,6 +425,7 @@
<item ref="swadesh" />
<item ref="switchboard" />
<item ref="tagsets" />
<item ref="tagsets_json" />
<item ref="timit" />
<item ref="toolbox" />
<item ref="treebank" />
Expand Down
Binary file added packages/help/tagsets_json.zip
Binary file not shown.

0 comments on commit 3b48d69

Please sign in to comment.