forked from nltk/nltk_data
-
Notifications
You must be signed in to change notification settings - Fork 0
/
index.xml
533 lines (533 loc) · 62.4 KB
/
index.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
<?xml version="1.0"?>
<?xml-stylesheet href="index.xsl" type="text/xsl"?>
<nltk_data>
<packages>
<package id="perluniprops" name="perluniprops: Index of Unicode Version 7.0.0 character properties in Perl" webpage="http://perldoc.perl.org/perluniprops.html" license="" unzip="1" unzipped_size="136038" size="100266" checksum="721ecf418efbfefb183d0559a7ef9f2d" subdir="misc" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/misc/perluniprops.zip" />
<package id="mwa_ppdb" name="The monolingual word aligner (Sultan et al. 2015) subset of the Paraphrase Database." webpage="http://www.cis.upenn.edu/~ccb/ppdb/" license="Creative Commons Attribution 3.0 Unported (CC-BY)" unzip="1" unzipped_size="3657054" size="1594711" checksum="e5836f76779020b225ad6114372b954a" subdir="misc" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/misc/mwa_ppdb.zip" />
<package id="punkt" name="Punkt Tokenizer Models" author="Jan Strunk" languages="Czech, Danish, Dutch, English, Estonian, Finnish, French, German, Greek, Italian, Malayalam, Norwegian, Polish, Portuguese, Russian, Slovene, Spanish, Swedish, Turkish" unzip="1" unzipped_size="37245719" size="13905355" checksum="8dd1d8760a0976f96e5c262decd75165" subdir="tokenizers" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/tokenizers/punkt.zip" />
<package id="rslp" name="RSLP Stemmer (Removedor de Sufixos da Lingua Portuguesa)" author="Viviane Moreira Orengo ([email protected]) and Christian Huyck" languages="Portuguese" unzip="1" unzipped_size="7269" size="3805" checksum="648798996224694251834699fa6e55f7" subdir="stemmers" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/stemmers/rslp.zip" />
<package id="porter_test" name="Porter Stemmer Test Files" unzip="1" unzipped_size="680060" size="200510" checksum="6af70bbc602aecd18aa0b9cfa7be2aa1" subdir="stemmers" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/stemmers/porter_test.zip" />
<package id="snowball_data" name="Snowball Data" languages="Danish, Dutch, English, Finnish, French, German, Hungarian, Italian, Norwegian, Portuguese, Romanian, Russian, Spanish, Swedish, Turkish" webpage="https://github.com/snowballstem/snowball-data" unzip="0" unzipped_size="36360836" size="6785405" checksum="cba1cf17b887789e6df5f2c87c6e56fb" subdir="stemmers" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/stemmers/snowball_data.zip" />
<package id="maxent_ne_chunker" name="ACE Named Entity Chunker (Maximum entropy)" languages="English" unzip="1" unzipped_size="23604982" size="13404747" checksum="d577c2cd0fdae148b36d046b14eb48e6" subdir="chunkers" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/chunkers/maxent_ne_chunker.zip" />
<package id="moses_sample" name="Moses Sample Models" webpage="http://www.statmt.org/moses/?n=Moses.SampleData" unzip="1" unzipped_size="10985045" size="10961490" checksum="715531d058ec253bd0683d0df23ec868" subdir="models" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/models/moses_sample.zip" />
<package id="bllip_wsj_no_aux" name="BLLIP Parser: WSJ Model" webpage="http://nlp.stanford.edu/~mcclosky/models/" unzip="1" unzipped_size="54298623" size="24516205" checksum="51d0c9c288b4f790bf255b5c9c3533ab" subdir="models" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/models/bllip_wsj_no_aux.zip" />
<package id="word2vec_sample" name="Word2Vec Sample" webpage="https://code.google.com/p/word2vec/" unzip="1" unzipped_size="138432415" size="49396025" checksum="d1d1a23377f9ab4c12d77c7a078318ac" subdir="models" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/models/word2vec_sample.zip" />
<package id="wmt15_eval" name="Evaluation data from WMT15" webpage="http://www.statmt.org/wmt15/" unzip="1" unzipped_size="1247631" size="383096" checksum="2067e40eaf94ccb632007b91073aa433" subdir="models" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/models/wmt15_eval.zip" />
<package id="spanish_grammars" name="Grammars for Spanish" author="Kepa Sarasola" languages="Spanish" unzip="1" unzipped_size="3980" size="4047" checksum="12f66b8e22beadd6ed202e95453465af" subdir="grammars" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/grammars/spanish_grammars.zip" />
<package id="sample_grammars" name="Sample Grammars" author="" languages="English" unzip="1" unzipped_size="61718" size="20293" checksum="c4a2a01345d1e61c8febd8d498c5d2d6" subdir="grammars" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/grammars/sample_grammars.zip" />
<package id="large_grammars" name="Large context-free and feature-based grammars for parser comparison" webpage="http://www.informatics.sussex.ac.uk/research/groups/nlp/carroll/elsps.html" contact="John A. Carroll" license="See the individual grammar files" languages="English" unzip="1" unzipped_size="4115732" size="283747" checksum="135aa813bd721d59ae595d9d7f115dc8" subdir="grammars" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/grammars/large_grammars.zip" />
<package id="book_grammars" name="Grammars from NLTK Book" author="Ewan Klein" languages="English" unzip="1" unzipped_size="21179" size="9103" checksum="2e6bc2e5d678fc5d14e4c0747c69083e" subdir="grammars" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/grammars/book_grammars.zip" />
<package id="basque_grammars" name="Grammars for Basque" author="Kepa Sarasola" languages="Spanish" unzip="1" unzipped_size="5550" size="4704" checksum="0e3518cb2aeb2600cb2841df7f035606" subdir="grammars" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/grammars/basque_grammars.zip" />
<package id="maxent_treebank_pos_tagger" name="Treebank Part of Speech Tagger (Maximum entropy)" languages="English" unzip="1" unzipped_size="17961132" size="10156853" checksum="e3b8a5353056073e164c5b06d0cc1fa7" subdir="taggers" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/taggers/maxent_treebank_pos_tagger.zip" />
<package id="averaged_perceptron_tagger" name="Averaged Perceptron Tagger" languages="English" unzip="1" unzipped_size="6138625" size="2526731" checksum="05c91d607ee1043181233365b3f76978" subdir="taggers" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/taggers/averaged_perceptron_tagger.zip" />
<package id="averaged_perceptron_tagger_ru" name="Averaged Perceptron Tagger (Russian)" webpage="http://www.ruscorpora.ru/en/" languages="Russian" unzip="1" unzipped_size="23247411" size="8628828" checksum="f7051368e4aff6718f8b38c1362dfdb1" subdir="taggers" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/taggers/averaged_perceptron_tagger_ru.zip" />
<package id="universal_tagset" name="Mappings to the Universal Part-of-Speech Tagset" author="Slav Petrov" license="CC-BY-SA-4.0" webpage="https://github.com/slavpetrov/universal-pos-tags" unzip="1" unzipped_size="37147" size="19095" checksum="ba5a69f2148a8cea6fb5084585e20890" subdir="taggers" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/taggers/universal_tagset.zip" />
<package id="vader_lexicon" name="VADER Sentiment Lexicon" author="C.J. Hutto and Eric Gilbert" webpage="https://github.com/cjhutto/vaderSentiment" license="MIT License" unzip="0" unzipped_size="434147" size="90486" checksum="8b3824e2c39b655dd225fb266c8bea53" subdir="sentiment" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/sentiment/vader_lexicon.zip" />
<package id="lin_thesaurus" name="Lin's Dependency Thesaurus" author="Dekang Lin" webpage="http://webdocs.cs.ualberta.ca/~lindek/downloads.htm" license="Distributed with permission of Dekang Lin" unzip="1" unzipped_size="210421609" size="89154019" checksum="288cc15e4ed257c8598d6f7a30199db9" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/lin_thesaurus.zip" />
<package id="movie_reviews" name="Sentiment Polarity Dataset Version 2.0" author="Bo Pang and Lillian Lee" copyright="Copyright (C) 2004 Bo Pang and Lillian Lee" webpage="http://www.cs.cornell.edu/people/pabo/movie-review-data/" license="Creative Commons Attribution 4.0 International" licenseurl="http://creativecommons.org/licenses/by/4.0/" unzip="1" unzipped_size="7790571" size="4004848" checksum="155de2b77c6834dd8eea7cbe88e93acb" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/movie_reviews.zip" />
<package id="problem_reports" name="Problem Report Corpus" webpage="http://www.cs.cmu.edu/~marmalade/reports.html" author="Andrew Ko, Carnegie Mellon University" unzip="1" unzipped_size="3467763" size="1032942" checksum="8781ace4c0a181c5875cdbfc01e895fb" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/problem_reports.zip" />
<package id="pros_cons" name="Pros and Cons" author="Bing Liu" copyright="Copyright (C) 2008 Bing Liu" license="Creative Commons Attribution 4.0 International" licenseurl="http://creativecommons.org/licenses/by/4.0/" webpage="http://www.cs.uic.edu/~liub/FBS/sentiment-analysis.html#datasets" unzip="1" unzipped_size="2921218" size="746276" checksum="c4c7e61fb4d57a2f6c95317194da0f17" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/pros_cons.zip" />
<package id="masc_tagged" name="MASC Tagged Corpus" copyright="Copyright (C) 2014 American National Corpus" author="Nancy Ide" license="This data may be used for the purposes of linguistic education, research, and development, including commercial development." webpage="http://www.anc.org/" unzip="0" unzipped_size="4963879" size="1602143" checksum="a03d3ae8c6c2a1707885066e4d62582a" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/masc_tagged.zip" />
<package id="sentence_polarity" name="Sentence Polarity Dataset v1.0" author="Bo Pang and Lillian Lee" copyright="Copyright (C) 2005 Bo Pang and Lillian Lee" license="Creative Commons Attribution 4.0 International" licenseurl="http://creativecommons.org/licenses/by/4.0/" webpage="http://www.cs.cornell.edu/People/pabo/people/pabo/movie-review-data" unzip="1" unzipped_size="1241127" size="490256" checksum="5cdc0cae7f558040d050c90eb2b72e97" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/sentence_polarity.zip" />
<package id="webtext" name="Web Text Corpus" unzip="1" unzipped_size="1726918" size="646297" checksum="6c7680030aae5c997b1370f832545c6a" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/webtext.zip" />
<package id="nps_chat" name="NPS Chat" author="Craig Martell ([email protected])" webpage="http://faculty.nps.edu/cmartell/NPSChat.htm" license="This corpus is distributed solely for non-commercial, non-profit educational and research use. It is a derivative compilation work of multiple works whose copyrights are held by the respective original authors." unzip="1" unzipped_size="2578726" size="301366" checksum="72d1b905ba2be48d711690b012856c79" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/nps_chat.zip" />
<package id="city_database" name="City Database" note="A very small database of information about cities" unzip="1" unzipped_size="4096" size="1708" checksum="29cbf1aa02ad8abc72dd955fe74f882c" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/city_database.zip" />
<package id="europarl_raw" name="Sample European Parliament Proceedings Parallel Corpus" author="Philipp Koehn, University of Edinburgh" webpage="http://www.statmt.org/europarl" unzip="1" unzipped_size="41396100" size="12594977" checksum="7621d5675990b1decc012c823716ee76" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/europarl_raw.zip" />
<package id="biocreative_ppi" name="BioCreAtIvE (Critical Assessment of Information Extraction Systems in Biology)" webpage="http://www.mitre.org/public/biocreative/" copyright="Public Domain (not copyrighted)" license="Public Domain" unzip="1" unzipped_size="1537086" size="223566" checksum="d3be36b53ab201372f1cd63ffc75e9a9" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/biocreative_ppi.zip" />
<package id="verbnet3" name="VerbNet Lexicon, Version 3.3" version="3.3" author="Karin Kipper-Schuler" webpage="https://verbs.colorado.edu/verbnet/" license="Distributed with permission of the author." unzip="1" unzipped_size="3723345" size="482025" checksum="60efc5ed90ab8a18ef4a436e4c39ffbf" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/verbnet3.zip" />
<package id="pe08" name="Cross-Framework and Cross-Domain Parser Evaluation Shared Task" version="Release 3 (20 April 2008)" webpage=" http://www-tsujii.is.s.u-tokyo.ac.jp/pe08-st/" license="Distributed with permission" unzip="1" unzipped_size="296619" size="80735" checksum="e72135042dc48772acad309a6adbb6f0" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/pe08.zip" />
<package id="pil" name="The Patient Information Leaflet (PIL) Corpus" version="Version 2.0 (31 March 2006)" webpage="http://mcs.open.ac.uk/nlg/old_projects/pills/corpus/" license="Distributed with permission" unzip="1" unzipped_size="4170899" size="1510205" checksum="d07b2ca7b5b351a24f4db8ae8fbc9e98" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/pil.zip" />
<package id="crubadan" name="Crubadan Corpus" copyright="Copyright (C) 2010 Kevin Scannell" author="Kevin Scannell" license="GPLv3" webpage="http://borel.slu.edu/crubadan/" unzip="1" unzipped_size="11256183" size="5288655" checksum="3cc831382dec41b8d9a06d93ef300352" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/crubadan.zip" />
<package id="gutenberg" name="Project Gutenberg Selections" webpage="https://gutenberg.org/" license="public domain" copyright="public domain" unzip="1" unzipped_size="11802669" size="4251829" checksum="48c9c8605cd70b0230687557ee543633" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/gutenberg.zip" />
<package id="propbank" name="Proposition Bank Corpus 1.0" contact="Martha Palmer" webpage="http://verbs.colorado.edu/~mpalmer/projects/ace.html" license="Distributed with permission" unzip="0" unzipped_size="18831005" size="5323498" checksum="2397782c6e6f46c9657f85db8a5421f6" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/propbank.zip" />
<package id="machado" name="Machado de Assis -- Obra Completa" author="Machado de Assis" license="Public Domain" webpage="http://machado.mec.gov.br/" unzip="0" unzipped_size="14855338" size="6151774" checksum="d186f7d6715479a8bec48b8b8030858e" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/machado.zip" />
<package id="state_union" name="C-Span State of the Union Address Corpus" webpage="http://www.c-span.org/executive/stateoftheunion.asp" copyright="public domain" license="public domain" unzip="1" unzipped_size="2073917" size="808757" checksum="044f2d20c592b17a26ac0102111833c9" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/state_union.zip" />
<package id="twitter_samples" name="Twitter Samples" copyright="Copyright (C) 2015 Twitter, Inc" license="Must be used subject to Twitter Developer Agreement (https://dev.twitter.com/overview/terms/agreement)" note="Sample of Tweets collected from the Twitter APIs, observing the 50k limit required by https://dev.twitter.com/overview/terms/policy#6._Be_a_Good_Partner_to_Twitter " unzip="1" unzipped_size="122350791" size="16007673" checksum="02fc79b5adc0357bc1e14747246fd3c1" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/twitter_samples.zip" />
<package id="semcor" name="SemCor 3.0" author="Rada Mihalcea ([email protected])" webpage="http://www.cse.unt.edu/~rada/downloads.html#semcor" license="You are granted permission to use, copy, modify and distribute this database for any purpose and without fee and royalty is hereby granted, provided that you agree to comply with the Princeton copyright notice and statements, including the disclaimer, and that the same appear on ALL copies of the database, including modifications that you make for internal use or for distribution. See semcor/README for more information." unzip="0" unzipped_size="37425596" size="4397021" checksum="46c095f0ab7090132567f87252af724f" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/semcor.zip" />
<package id="wordnet31" name="Wordnet 3.1" version="3.1" license="Permission to use, copy, modify and distribute this software and database and its documentation for any purpose and without fee or royalty is hereby granted, provided that you agree to comply with the following copyright notice and statements, including the disclaimer, and that the same appear on ALL copies of the software, database and documentation, including modifications that you make for internal use or for distribution.... [see webpage for full license]" copyright="WordNet 3.1 Copyright 2011 by Princeton University. All rights reserved." webpage="http://wordnet.princeton.edu/" unzip="0" unzipped_size="37411975" size="11058667" checksum="d3392d6facef35433ffcef838b47cae1" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/wordnet31.zip" />
<package id="extended_omw" name="Extended Open Multilingual WordNet" copyright="Copyright (C) 2013 Francis Bond and Ryan Foster" license="CC by SA 3.0 Licence (for data from Wikitionary) and Unicode, Inc. Licence Agreement (for data from CLDR)" webpage="http://compling.hss.ntu.edu.sg/omw/summx.html" unzip="0" unzipped_size="36087752" size="11251284" checksum="8cc3931b20fdc2a2fe1ed9d42567d51b" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/extended_omw.zip" />
<package id="names" name="Names Corpus, Version 1.3 (1994-03-29)" copyright="Copyright (C) 1991 Mark Kantrowitz" author="Mark Kantrowitz and Bill Ross" license="You may use the lists of names for any purpose, so long as credit is given in any published work. You may also redistribute the list if you provide the recipients with a copy of this README file. The lists are not in the public domain (I retain the copyright on the lists) but are freely redistributable. If you have any additions to the lists of names, I would appreciate receiving them." webpage="http://www-2.cs.cmu.edu/afs/cs/project/ai-repository/ai/areas/nlp/corpora/names/" unzip="1" unzipped_size="56572" size="21326" checksum="93844d7c995ad28f40528c08a3430175" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/names.zip" />
<package id="ptb" name="Penn Treebank" copyright="Copyright (C) 1995 University of Pennsylvania" license="This is a stub for the full Penn Treebank Corpus version 3." unzip="1" unzipped_size="63036" size="6289" checksum="7b633a1b7770279eab00bc1108769c67" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/ptb.zip" />
<package id="nombank.1.0" name="NomBank Corpus 1.0" contact="Adam Meyers" webpage="http://nlp.cs.nyu.edu/meyers/NomBank.html" license="Distributed with permission" unzip="0" unzipped_size="42315496" size="6728397" checksum="57afdc46230ea33208e4e277de24765b" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/nombank.1.0.zip" />
<package id="floresta" name="Portuguese Treebank" license="Non-commercial use only" webpage="http://www.linguateca.pt/Floresta/" unzip="1" unzipped_size="16414136" size="1882021" checksum="de5f1df09949f080e0f616f0bc55967d" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/floresta.zip" />
<package id="comtrans" name="ComTrans Corpus Sample" author="Reinhard Rapp" webpage="http://www.fask.uni-mainz.de/user/rapp/comtrans/" unzip="0" unzipped_size="35387522" size="11904518" checksum="8e1e34e2f052d8188fd877b2c821b42d" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/comtrans.zip" />
<package id="knbc" name="KNB Corpus (Annotated blog corpus)" webpage="http://lilyx.net/pages/nltkjapanesecorpus.html" license="Freely re-distributable under the same license as the original KNB Corpus." unzip="0" unzipped_size="23601139" size="8760788" checksum="992f8a3647f333e28a9958eba4bd67c7" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/knbc.zip" />
<package id="mac_morpho" name="MAC-MORPHO: Brazilian Portuguese news text with part-of-speech tags" webpage="http://www.nilc.icmc.usp.br/lacioweb/" license="Distributed with permission of Núcleo Interinstitucional de Lingüística Computacional (NILC), Universidade de São Paulo (USP) in São Carlos, Universidade Federal de São Carlos (UFSCar), Universidade Estadual Paulista (UNESP) of Araraquara." unzip="1" unzipped_size="10941402" size="3013904" checksum="cf216ae5b37cca24866909f8594c5395" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/mac_morpho.zip" />
<package id="swadesh" name="Swadesh Wordlists" webpage="http://en.wiktionary.org/wiki/Appendix:Swadesh_list" license="GNU Free Documentation License" unzip="1" unzipped_size="39998" size="22828" checksum="6612ccb71f327e85780dc7813dee40f6" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/swadesh.zip" />
<package id="rte" name="PASCAL RTE Challenges 1, 2, and 3" webpage="http://www.pascal-network.org/Challenges/RTE/" unzip="1" unzipped_size="1279930" size="386303" checksum="ca21663daa326a3bb53001c3d82e62d6" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/rte.zip" />
<package id="toolbox" name="Toolbox Sample Files" unzip="1" unzipped_size="829593" size="250616" checksum="26657c1b8b5f5afdc3d5d754393a9216" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/toolbox.zip" />
<package id="jeita" name="JEITA Public Morphologically Tagged Corpus (in ChaSen format)" webpage="http://lilyx.net/pages/nltkjapanesecorpus.html" license="Freely re-distributable under the same license as the original JEITA corpus. Each document retains its own license from Aozora bunko and Project Sugita Genpaku." unzip="0" unzipped_size="134170650" size="16531215" checksum="96e30423d6887fad17fc44f2f30d920d" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/jeita.zip" />
<package id="product_reviews_1" name="Product Reviews (5 Products)" author="Bing Liu" copyright="Copyright (C) 2004 Bing Liu" license="Creative Commons Attribution 4.0 International" licenseurl="http://creativecommons.org/licenses/by/4.0/" webpage="http://www.cs.uic.edu/~liub/FBS/sentiment-analysis.html#datasets" unzip="1" unzipped_size="396548" size="141287" checksum="c13be66052027a4605ca456d7cda0917" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/product_reviews_1.zip" />
<package id="omw" name="Open Multilingual Wordnet" author="Francis Bond" license="Please consult the LICENSE files included with the individual Wordnets. Note that all permit redistribution." copyright="Please consult the copyright statements of the individual Wordnets" webpage="https://omwn.org/" unzip="0" unzipped_size="50269427" size="12110409" checksum="8e2adf0627365f0c51a05807737a5e5c" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/omw.zip" />
<package id="wordnet2022" name="Open English Wordnet 2022" version="2022" license="This resource is derived from Princeton WordNet under the WordNet License and further developed under the Creative Commons Attribution 4.0 International License. You may share and adapt this resource providing attribution is given to both Princeton WordNet and the Open English WordNet team." copyright="Open English Wordnet 2022 Copyright 2022 by the Open English Wordnet team. WordNet 3.1 Copyright 2011 by Princeton University. All rights reserved." webpage="https://en-word.net/" unzip="1" unzipped_size="38474234" size="11353460" checksum="0b17803d0d8b85ad25b5037af83d6a1c" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/wordnet2022.zip" />
<package id="sentiwordnet" name="SentiWordNet" copyright="Copyright (C) 2013 SentiWordNet Project" author="Stefano Baccianella, Andrea Esuli, and Fabrizio Sebastiani" license="Creative Commons Attribution ShareAlike 3.0 Unported license" webpage="http://sentiwordnet.isti.cnr.it/" unzip="1" unzipped_size="13591402" size="4686546" checksum="5043f00829b7db4dd5f21507e092b76a" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/sentiwordnet.zip" />
<package id="product_reviews_2" name="Product Reviews (9 Products)" author="Bing Liu" copyright="Copyright (C) 2007 Bing Liu" license="Creative Commons Attribution 4.0 International" licenseurl="http://creativecommons.org/licenses/by/4.0/" webpage="http://www.cs.uic.edu/~liub/FBS/sentiment-analysis.html#datasets" unzip="1" unzipped_size="438549" size="170698" checksum="522134e8b91086473299c3800c4adbae" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/product_reviews_2.zip" />
<package id="abc" name="Australian Broadcasting Commission 2006" webpage="http://www.abc.net.au/" author="Australian Broadcasting Commission" unzip="1" unzipped_size="4054966" size="1487851" checksum="ffb36b67ff24cbf7daaf171c897eb904" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/abc.zip" />
<package id="wordnet2021" name="Open English Wordnet 2021" version="2021" license="This resource is derived from Princeton WordNet under the WordNet License and further developed under the Creative Commons Attribution 4.0 International License. You may share and adapt this resource providing attribution is given to both Princeton WordNet and the Open English WordNet team." copyright="Open English Wordnet 2021 Copyright 2021 by the Open English Wordnet team. WordNet 3.1 Copyright 2011 by Princeton University. All rights reserved." webpage="https://en-word.net/" unzip="0" unzipped_size="38408913" size="11332750" checksum="99da08a34df218457c3233d6a3dd31b8" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/wordnet2021.zip" />
<package id="udhr2" name="Universal Declaration of Human Rights Corpus (Unicode Version)" webpage="http://unicode.org/udhr/" license="public domain" copyright="public domain" unzip="1" unzipped_size="5677920" size="1653975" checksum="e604482d2dc8dd2580af7d97c1bf0a80" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/udhr2.zip" />
<package id="senseval" name="SENSEVAL 2 Corpus: Sense Tagged Text" contact="Ted Pedersen ([email protected])" license="Distributed with permission." webpage="http://www.senseval.org/" unzip="1" unzipped_size="16463075" size="2151350" checksum="bfc6a33c62ddc2ec24b02701a2f364ff" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/senseval.zip" />
<package id="words" name="Word Lists" webpage="http://en.wikipedia.org/wiki/Words_(Unix)" license="public domain" copyright="public domain" unzip="1" unzipped_size="2498552" size="757777" checksum="8594d9d5422e01d993dfbbc3f38d3ae5" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/words.zip" />
<package id="framenet_v15" name="FrameNet 1.5" author="Collin F. Baker" license="May be used for non-commercial purposes." webpage="http://framenet.icsi.berkeley.edu" unzip="1" unzipped_size="579133737" size="69337891" checksum="cf68365950b2f048bcb48619de81f50a" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/framenet_v15.zip" />
<package id="unicode_samples" name="Unicode Samples" note="A very small corpus used to demonstrate unicode encoding in chapter 10 of the book" unzip="1" unzipped_size="643" size="1212" checksum="d46699450dd2287f5c115d8c1a0819f1" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/unicode_samples.zip" />
<package id="kimmo" name="PC-KIMMO Data Files" webpage="http://www.sil.org/pckimmo/" unzip="1" unzipped_size="814609" size="186958" checksum="68a8716e0233ad9c0ed0947952e4eb3e" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/kimmo.zip" />
<package id="framenet_v17" name="FrameNet 1.7" author="Collin F. Baker" license="Creative Commons Attribution 3.0 Unported License" webpage="http://framenet.icsi.berkeley.edu" unzip="1" unzipped_size="855026962" size="99207152" checksum="aaef1cfdcf37000cf2a5c562407fbddb" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/framenet_v17.zip" />
<package id="chat80" name="Chat-80 Data Files" copyright="Copyright (C) 1982 David Warren and Fernando Pereira" license="This program may be used, copied, altered or included in other programs only for academic purposes and provided that the authorship of the initial program is aknowledged. Use for commercial purposes without the previous written agreement of the authors is forbidden." author="David Warren and Fernando Pereira" webpage="http://www.cis.upenn.edu/~pereira/oldies.html" unzip="1" unzipped_size="63817" size="19209" checksum="6832873fe92996846ac5bb21c5d84eb8" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/chat80.zip" />
<package id="qc" name="Experimental Data for Question Classification" author="Xin Li and Dan Roth, UIUC" webpage="http://l2r.cs.uiuc.edu/~cogcomp/Data/QA/QC/" unzip="1" unzipped_size="361090" size="125456" checksum="afd4145ac31cb8d7db715974b9b8b57a" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/qc.zip" />
<package id="inaugural" name="C-Span Inaugural Address Corpus" copyright="public domain" license="public domain" unzip="1" unzipped_size="807436" size="346476" checksum="4e01cc9505ed7f9d04a330e67fb45509" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/inaugural.zip" />
<package id="wordnet" name="WordNet" version="3.0" license="Permission to use, copy, modify and distribute this software and database and its documentation for any purpose and without fee or royalty is hereby granted, provided that you agree to comply with the following copyright notice and statements, including the disclaimer, and that the same appear on ALL copies of the software, database and documentation, including modifications that you make for internal use or for distribution.... [see webpage for full license]" copyright="WordNet 3.0 Copyright 2006 by Princeton University. All rights reserved." webpage="http://wordnet.princeton.edu/" unzip="0" unzipped_size="36353991" size="10775600" checksum="b3f38606f626e54c6f060548546f71f0" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/wordnet.zip" />
<package id="stopwords" name="Stopwords Corpus" webpage="ftp://ftp.cs.cornell.edu/pub/smart/english.stop and http://snowball.tartarus.org/ and others" unzip="1" unzipped_size="81407" size="34276" checksum="8726a900bca7083674536e2593686361" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/stopwords.zip" />
<package id="verbnet" name="VerbNet Lexicon, Version 2.1" version="2.1" author="Karin Kipper-Schuler" webpage="https://verbs.colorado.edu/verbnet/" license="Distributed with permission of the author." unzip="1" unzipped_size="2474526" size="323661" checksum="427dac60e4a94ae910248ccd9986a22a" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/verbnet.zip" />
<package id="shakespeare" name="Shakespeare XML Corpus Sample" license="public domain" copyright="public domain" webpage="http://www.andrew.cmu.edu/user/akj/shakespeare/" sample="True" unzip="1" unzipped_size="1727210" size="475458" checksum="2332b32a7d83d657092ba4667c2c84c3" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/shakespeare.zip" />
<package id="ycoe" name="York-Toronto-Helsinki Parsed Corpus of Old English Prose" webpage="http://www.ota.ahds.ac.uk/" available="False" unzip="1" unzipped_size="277" size="477" checksum="6582cd98ca26c35d9c4eaaa4350ce8f3" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/ycoe.zip" />
<package id="ieer" name="NIST IE-ER DATA SAMPLE" webpage="http://www.itl.nist.gov/iad/894.01/tests/ie-er/er_99/er_99.htm" unzip="1" unzipped_size="541349" size="166156" checksum="34157f569624bc8d642ef8da5722b14a" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/ieer.zip" />
<package id="cess_cat" name="CESS-CAT Treebank" webpage="http://clic.ub.edu/cessece/" license="If you use these corpora for research, please cite thusly: CESS-Cat project (M. Antonia Martí, MarionaTaulé, Lluís Márquez, Manuel Bertran (2007) ?CESS-ECE: A Multilingual and Multilevel Annotated Corpus? in http://www.lsi.upc.edu/~mbertran/cess-ece/publications)." unzip="1" unzipped_size="33720460" size="5396688" checksum="e91ac59ec6e98e3b297e2d2eab83084d" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/cess_cat.zip" />
<package id="switchboard" name="Switchboard Corpus Sample" sample="True" license="Permission is granted for use of this material in accordance with the Open Content License [http://opencontent.org/opl.shtml]. This corpus contains transcripts and annotations for 36 calls from the Switchboard Corpus [http://www.ldc.upenn.edu/Catalog/LDC93S7.html]." unzip="1" unzipped_size="2541179" size="791161" checksum="878df010a9f2c2d0a6546a8365f10595" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/switchboard.zip" />
<package id="comparative_sentences" name="Comparative Sentence Dataset" copyright="Copyright (C) 2006 Nitin Jindal and Bing Liu" author="Nitin Jindal and Bing Liu" license="Creative Commons Attribution 4.0 International" licenseurl="http://creativecommons.org/licenses/by/4.0/" webpage="http://www.cs.uic.edu/~liub/FBS/sentiment-analysis.html#datasets" unzip="1" unzipped_size="774200" size="279121" checksum="df2d005f455afb760fa37d7f565400f1" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/comparative_sentences.zip" />
<package id="subjectivity" name="Subjectivity Dataset v1.0" author="Bo Pang and Lillian Lee" copyright="Copyright (C) 2004 Bo Pang and Lillian Lee" license="Creative Commons Attribution 4.0 International" licenseurl="http://creativecommons.org/licenses/by/4.0/" webpage=" http://www.cs.cornell.edu/People/pabo/people/pabo/movie-review-data" unzip="1" unzipped_size="1303352" size="521628" checksum="a81a44513903ba6bb86f85aeff149561" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/subjectivity.zip" />
<package id="udhr" name="Universal Declaration of Human Rights Corpus" webpage="http://www.un.org/Overview/rights.html" license="public domain" copyright="public domain" unzip="1" unzipped_size="3261577" size="1170177" checksum="745b3a90feb25c95fc805ebbd1ef5258" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/udhr.zip" />
<package id="pl196x" name="Polish language of the XX century sixties" author="I. Kurcz, A. Lewicki, J. Sambor, K. Szafran, J. Woronczak" license="GNU General Public License" webpage="http://www.mimuw.edu.pl/polszczyzna/pl196x/index_en.htm" unzip="1" unzipped_size="58299303" size="7051453" checksum="bcbdcf0fc2420fac238ca17dc7bfe423" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/pl196x.zip" />
<package id="paradigms" name="Paradigm Corpus" author="Cathy Bow, University of Melbourne" license="Distributed with the permission of the author" unzip="1" unzipped_size="361186" size="24902" checksum="745ee9036c5ca3226be24c97515f5707" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/paradigms.zip" />
<package id="gazetteers" name="Gazeteer Lists" license="GNU Free Documentation License; or public domain (depending on the file)" unzip="1" unzipped_size="12711" size="8265" checksum="1dd15c714a2be985c482a13d90e9caa4" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/gazetteers.zip" />
<package id="timit" name="TIMIT Corpus Sample" sample="True" license="This corpus sample is Copyright 1993 Linguistic Data Consortium, and is distributed under the terms of the Creative Commons Attribution, Non-Commercial, ShareAlike license. http://creativecommons.org/" webpage="http://www.ldc.upenn.edu/Catalog/CatalogEntry.jsp?catalogId=LDC93S1" unzip="1" unzipped_size="31932925" size="22251869" checksum="34c047c4749a811287f2c652104d7849" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/timit.zip" />
<package id="treebank" name="Penn Treebank Sample" sample="True" copyright="Copyright (C) 1995 University of Pennsylvania" license="This is a 10% fragment of Penn Treebank, (C) LDC 1995. It is made available under fair use for the purposes of illustrating NLTK tools for tokenizing, tagging, chunking and parsing. This data is for non-commercial use only." unzip="1" unzipped_size="5963497" size="1740034" checksum="78c24a97940c2504d0ad35dd3f8a560b" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/treebank.zip" />
<package id="sinica_treebank" name="Sinica Treebank Corpus Sample" webpage="http://rocling.iis.sinica.edu.tw/CKIP/engversion/treebank.htm" license="Distributed with the Natural Language Toolkit under the terms of the Creative Commons Attribution-NonCommercial-ShareAlike License [http://creativecommons.org/licenses/by-nc-sa/2.5/]." sample="True" unzip="1" unzipped_size="3293083" size="906706" checksum="979a905010d475a74475064211cd63c8" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/sinica_treebank.zip" />
<package id="opinion_lexicon" name="Opinion Lexicon" author="Bing Liu" copyright="Copyright (C) 2011 Bing Liu" license="Creative Commons Attribution 4.0 International" licenseurl="http://creativecommons.org/licenses/by/4.0/" webpage="http://www.cs.uic.edu/~liub/FBS/sentiment-analysis.html#datasets" unzip="1" unzipped_size="67865" size="24947" checksum="43a521f055063e001845b9d484a50173" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/opinion_lexicon.zip" />
<package id="ppattach" name="Prepositional Phrase Attachment Corpus" author="Adwait Ratnaparkhi" webpage="ftp://ftp.cis.upenn.edu/pub/adwait/PPattachData/" copyright="(C) 1994 Adwait Ratnaparkhi" license="Distributed with the permission of the author." unzip="1" unzipped_size="3113650" size="781714" checksum="cce212b7ace8e64722ba2f41f802a5d0" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/ppattach.zip" />
<package id="dependency_treebank" name="Dependency Parsed Treebank" sample="True" copyright="Copyright (C) 1995 University of Pennsylvania" license="This is a 10% fragment of Penn Treebank, (C) LDC 1995, which has been dependency parsed. It is made available under fair use for the purposes of illustrating NLTK tools for tokenizing, tagging, chunking and parsing. This data is for non-commercial use only." unzip="1" unzipped_size="1069540" size="457429" checksum="631e959acaa42eea718daf04c5cdfa76" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/dependency_treebank.zip" />
<package id="reuters" name="The Reuters-21578 benchmark corpus, ApteMod version" webpage="http://kdd.ics.uci.edu/databases/reuters21578/reuters21578.html" license="The copyright for the text of newswire articles and Reuters annotations in the Reuters-21578 collection resides with Reuters Ltd. Reuters Ltd. and Carnegie Group, Inc. have agreed to allow the free distribution of this data *for research purposes only*. If you publish results based on this data set, please acknowledge its use, refer to the data set by the name 'Reuters-21578, Distribution 1.0', and inform your readers of the current location of the data set." unzip="0" unzipped_size="9073648" size="6378691" checksum="c2acb24d5cccf8035e0fe8d29f440a68" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/reuters.zip" />
<package id="genesis" name="Genesis Corpus" copyright="public domain" license="public domain" unzip="1" unzipped_size="1426122" size="473239" checksum="2a76432753c01fe179684e0ae3a4d023" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/genesis.zip" />
<package id="cess_esp" name="CESS-ESP Treebank" webpage="http://clic.ub.edu/cessece/" license="If you use these corpora for research, please cite thusly: CESS-Cat project (M. Antonia Martí, MarionaTaulé, Lluís Márquez, Manuel Bertran (2007) ?CESS-ECE: A Multilingual and Multilevel Annotated Corpus? in http://www.lsi.upc.edu/~mbertran/cess-ece/publications)." unzip="1" unzipped_size="13233272" size="2220392" checksum="684432d4f6384b8f0bd19fee5dc15925" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/cess_esp.zip" />
<package id="conll2007" name="Dependency Treebanks from CoNLL 2007 (Catalan and Basque Subset)" webpage="http://nextens.uvt.nl/depparse-wiki/DataDownload" contact="Kepa Sarasola" copyright="Copyright (C) 2007 The University of the Basque Country" license="Creative Commons Attribution-NonCommercial-NoDerivativeWorks license" unzip="0" unzipped_size="6399295" size="1242958" checksum="b9015928e35c41f0695525289df5208f" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/conll2007.zip" />
<package id="nonbreaking_prefixes" name="Non-Breaking Prefixes (Moses Decoder)" webpage="https://github.com/moses-smt/mosesdecoder/tree/master/scripts/share/nonbreaking_prefixes" license="Gnu LGPL" unzip="1" unzipped_size="43361" size="25437" checksum="5e7d700390745114cd3a52160d6f2eac" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/nonbreaking_prefixes.zip" />
<package id="dolch" name="Dolch Word List" webpage="https://en.wikipedia.org/wiki/Dolch_word_list" unzip="1" unzipped_size="1917" size="2116" checksum="6f9c042774b96366c93fd0f9a9adb697" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/dolch.zip" />
<package id="smultron" name="SMULTRON Corpus Sample" author="Sofia Gustafson-Capkova, Yvonne Samuelsson, and Martin Volk" webpage="http://www.ling.su.se/DaLi/research/smultron/index.htm" unzip="1" unzipped_size="1677647" size="166207" checksum="8743ff232d76aaf2ff8a10523503a659" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/smultron.zip" />
<package id="alpino" name="Alpino Dutch Treebank" webpage="http://www.let.rug.nl/~vannoord/trees/" contact="Gertjan van Noord" license="Distributed with permission of Gertjan van Noord" unzip="1" unzipped_size="21604821" size="2797255" checksum="ae529a1c5f13d6074f5b0d68d8edb537" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/alpino.zip" />
<package id="wordnet_ic" name="WordNet-InfoContent" version="3.0" webpage="http://wn-similarity.sourceforge.net" unzip="1" unzipped_size="34220359" size="12056682" checksum="25f0185b31693fa11ea898e4feda528c" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/wordnet_ic.zip" />
<package id="brown" name="Brown Corpus" author="W. N. Francis and H. Kucera" license="May be used for non-commercial purposes." webpage="http://www.hit.uib.no/icame/brown/bcm.html" unzip="1" unzipped_size="10117565" size="3314357" checksum="a0a8630959d3d937873b1265b0a05497" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/brown.zip" />
<package id="bcp47" name="BCP-47 Language Tags" license="IETF Trust and Unicode Inc." copyright="Copyright (c) 2022 IETF Trust and Copyright (c) 1991-2022 Unicode" webpage="https://www.rfc-editor.org/rfc/rfc5646.html" unzip="0" unzipped_size="1433135" size="222952" checksum="8ef6c0dfa7661e3338dd99c495a7d9b6" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/bcp47.zip" />
<package id="panlex_swadesh" name="PanLex Swadesh Corpora" author="Jonathan Pool (editor)" license="CC0 1.0 Universal" webpage="http://panlex.org/" unzip="0" unzipped_size="4418150" size="2861668" checksum="66dd080f09ac17db3d31bb4d667d0794" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/panlex_swadesh.zip" />
<package id="conll2000" name="CONLL 2000 Chunking Corpus" webpage="http://www.cnts.ua.ac.be/conll2000/chunking/" contact="Erik Tjong Kim Sang ([email protected])" unzip="1" unzipped_size="3495903" size="756607" checksum="9529b285edd5fe47271da69df1052301" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/conll2000.zip" />
<package id="universal_treebanks_v20" name="Universal Treebanks Version 2.0" license="Creative Commons Attribution-NonCommercial-ShareAlike 3.0 United States" webpage="https://code.google.com/p/uni-dep-tb/" unzip="0" unzipped_size="119113962" size="25908853" checksum="4acd3991768a727be019a8021fe376d2" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/universal_treebanks_v20.zip" />
<package id="brown_tei" name="Brown Corpus (TEI XML Version)" author="W. N. Francis and H. Kucera" license="May be used for non-commercial purposes." webpage="http://www.hit.uib.no/icame/brown/bcm.html" contact="Lou Burnard -- [email protected]" unzip="1" unzipped_size="56814689" size="8737738" checksum="3c7fe43ebf0a4c7ad3ebb63dab027e09" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/brown_tei.zip" />
<package id="cmudict" name="The Carnegie Mellon Pronouncing Dictionary (0.6)" webpage="ftp://ftp.cs.cmu.edu/project/speech/dict/" copyright="Copyright 1998 Carnegie Mellon University" license="Use of this dictionary, for any research or commercial purpose, is completely unrestricted. If you use or redistribute this material, we would appreciate acknowlegement of its origin." unzip="1" unzipped_size="3824638" size="896069" checksum="58f743ff818b983b89ef9302b509fc41" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/cmudict.zip" />
<package id="omw-1.4" name="Open Multilingual Wordnet" author="Francis Bond" license="Please consult the LICENSE files included with the individual Wordnets. Note that all permit redistribution." copyright="Please consult the copyright statements of the individual Wordnets" webpage="https://omwn.org/" unzip="0" unzipped_size="96786003" size="26634772" checksum="e2acd8d3aa9c7c3dca4d8d4d169a29b8" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/omw-1.4.zip" />
<package id="mte_teip5" name="MULTEXT-East 1984 annotated corpus 4.0" author="Erjavec, Tomaž; Barbu, Ana-Maria; Derzhanski, Ivan; Dimitrova, Ludmila; Garabík, Radovan; Ide, Nancy; Kaalep, Heiki-Jaan; Kotsyba, Natalia; Krstev, Cvetana; Oravecz, Csaba; Petkevič, Vladimír; Priest-Dorman, Greg; QasemiZadeh, Behrang; Radziszewski, Adam; Simov, Kiril; Tufiş, Dan and Zdravkova, Katerina" license="Creative Commons - Attribution-NonCommercial-ShareAlike 4.0 International (CC BY-NC-SA 4.0)" webpage="https://www.clarin.si/repository/xmlui/handle/11356/1043" unzip="1" unzipped_size="122461442" size="14800561" checksum="27aa12b3546cb241df8699506ab15128" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/mte_teip5.zip" />
<package id="indian" name="Indian Language POS-Tagged Corpus" author="A Kumaran" license="Distributed with permission" unzip="1" unzipped_size="1091033" size="199187" checksum="599a684793935ecbcf8276133945037c" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/indian.zip" />
<package id="conll2002" name="CONLL 2002 Named Entity Recognition Corpus" webpage="http://www.cnts.ua.ac.be/conll2002/ner/" unzip="1" unzipped_size="7785638" size="1867449" checksum="67bb4ca75fa81544d42a159524726e78" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/conll2002.zip" />
<package id="tagsets" name="Help on Tagsets" author="UCREL, Lancaster University" languages="English" unzip="1" unzipped_size="79723" size="34531" checksum="e15834e0dd89b107925af6bb11a8eaa4" subdir="help" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/help/tagsets.zip" />
</packages>
<collections>
<collection id="all-nltk" name="All packages available on nltk_data gh-pages branch">
<item ref="abc" />
<item ref="alpino" />
<item ref="averaged_perceptron_tagger" />
<item ref="averaged_perceptron_tagger_ru" />
<item ref="basque_grammars" />
<item ref="bcp47" />
<item ref="biocreative_ppi" />
<item ref="bllip_wsj_no_aux" />
<item ref="book_grammars" />
<item ref="brown" />
<item ref="brown_tei" />
<item ref="cess_cat" />
<item ref="cess_esp" />
<item ref="chat80" />
<item ref="city_database" />
<item ref="cmudict" />
<item ref="comparative_sentences" />
<item ref="comtrans" />
<item ref="conll2000" />
<item ref="conll2002" />
<item ref="conll2007" />
<item ref="crubadan" />
<item ref="dependency_treebank" />
<item ref="dolch" />
<item ref="europarl_raw" />
<item ref="extended_omw" />
<item ref="floresta" />
<item ref="framenet_v15" />
<item ref="framenet_v17" />
<item ref="gazetteers" />
<item ref="genesis" />
<item ref="gutenberg" />
<item ref="ieer" />
<item ref="inaugural" />
<item ref="indian" />
<item ref="jeita" />
<item ref="kimmo" />
<item ref="knbc" />
<item ref="large_grammars" />
<item ref="lin_thesaurus" />
<item ref="mac_morpho" />
<item ref="machado" />
<item ref="masc_tagged" />
<item ref="maxent_ne_chunker" />
<item ref="maxent_treebank_pos_tagger" />
<item ref="moses_sample" />
<item ref="movie_reviews" />
<item ref="mte_teip5" />
<item ref="mwa_ppdb" />
<item ref="names" />
<item ref="nombank.1.0" />
<item ref="nonbreaking_prefixes" />
<item ref="nps_chat" />
<item ref="omw" />
<item ref="omw-1.4" />
<item ref="opinion_lexicon" />
<item ref="panlex_swadesh" />
<item ref="paradigms" />
<item ref="pe08" />
<item ref="perluniprops" />
<item ref="pil" />
<item ref="pl196x" />
<item ref="porter_test" />
<item ref="ppattach" />
<item ref="problem_reports" />
<item ref="product_reviews_1" />
<item ref="product_reviews_2" />
<item ref="propbank" />
<item ref="pros_cons" />
<item ref="ptb" />
<item ref="punkt" />
<item ref="qc" />
<item ref="reuters" />
<item ref="rslp" />
<item ref="rte" />
<item ref="sample_grammars" />
<item ref="semcor" />
<item ref="senseval" />
<item ref="sentence_polarity" />
<item ref="sentiwordnet" />
<item ref="shakespeare" />
<item ref="sinica_treebank" />
<item ref="smultron" />
<item ref="snowball_data" />
<item ref="spanish_grammars" />
<item ref="state_union" />
<item ref="stopwords" />
<item ref="subjectivity" />
<item ref="swadesh" />
<item ref="switchboard" />
<item ref="tagsets" />
<item ref="timit" />
<item ref="toolbox" />
<item ref="treebank" />
<item ref="twitter_samples" />
<item ref="udhr" />
<item ref="udhr2" />
<item ref="unicode_samples" />
<item ref="universal_tagset" />
<item ref="universal_treebanks_v20" />
<item ref="vader_lexicon" />
<item ref="verbnet" />
<item ref="verbnet3" />
<item ref="webtext" />
<item ref="wmt15_eval" />
<item ref="word2vec_sample" />
<item ref="wordnet" />
<item ref="wordnet2021" />
<item ref="wordnet2022" />
<item ref="wordnet31" />
<item ref="wordnet_ic" />
<item ref="words" />
<item ref="ycoe" />
</collection>
<collection id="book" name="Everything used in the NLTK Book">
<item ref="abc" />
<item ref="brown" />
<item ref="chat80" />
<item ref="cmudict" />
<item ref="conll2000" />
<item ref="conll2002" />
<item ref="dependency_treebank" />
<item ref="genesis" />
<item ref="gutenberg" />
<item ref="ieer" />
<item ref="inaugural" />
<item ref="movie_reviews" />
<item ref="nps_chat" />
<item ref="names" />
<item ref="ppattach" />
<item ref="reuters" />
<item ref="senseval" />
<item ref="state_union" />
<item ref="stopwords" />
<item ref="swadesh" />
<item ref="timit" />
<item ref="treebank" />
<item ref="toolbox" />
<item ref="udhr" />
<item ref="udhr2" />
<item ref="unicode_samples" />
<item ref="webtext" />
<item ref="wordnet" />
<item ref="wordnet_ic" />
<item ref="words" />
<item ref="maxent_treebank_pos_tagger" />
<item ref="maxent_ne_chunker" />
<item ref="universal_tagset" />
<item ref="punkt" />
<item ref="book_grammars" />
<item ref="city_database" />
<item ref="tagsets" />
<item ref="panlex_swadesh" />
<item ref="averaged_perceptron_tagger" />
</collection>
<collection id="third-party" name="Third-party data packages">
<item ref="dolch" />
</collection>
<collection id="all" name="All packages">
<item ref="abc" />
<item ref="alpino" />
<item ref="averaged_perceptron_tagger" />
<item ref="averaged_perceptron_tagger_ru" />
<item ref="basque_grammars" />
<item ref="bcp47" />
<item ref="biocreative_ppi" />
<item ref="bllip_wsj_no_aux" />
<item ref="book_grammars" />
<item ref="brown" />
<item ref="brown_tei" />
<item ref="cess_cat" />
<item ref="cess_esp" />
<item ref="chat80" />
<item ref="city_database" />
<item ref="cmudict" />
<item ref="comparative_sentences" />
<item ref="comtrans" />
<item ref="conll2000" />
<item ref="conll2002" />
<item ref="conll2007" />
<item ref="crubadan" />
<item ref="dependency_treebank" />
<item ref="dolch" />
<item ref="europarl_raw" />
<item ref="extended_omw" />
<item ref="floresta" />
<item ref="framenet_v15" />
<item ref="framenet_v17" />
<item ref="gazetteers" />
<item ref="genesis" />
<item ref="gutenberg" />
<item ref="ieer" />
<item ref="inaugural" />
<item ref="indian" />
<item ref="jeita" />
<item ref="kimmo" />
<item ref="knbc" />
<item ref="large_grammars" />
<item ref="lin_thesaurus" />
<item ref="mac_morpho" />
<item ref="machado" />
<item ref="masc_tagged" />
<item ref="maxent_ne_chunker" />
<item ref="maxent_treebank_pos_tagger" />
<item ref="moses_sample" />
<item ref="movie_reviews" />
<item ref="mte_teip5" />
<item ref="mwa_ppdb" />
<item ref="names" />
<item ref="nombank.1.0" />
<item ref="nonbreaking_prefixes" />
<item ref="nps_chat" />
<item ref="omw" />
<item ref="omw-1.4" />
<item ref="opinion_lexicon" />
<item ref="panlex_swadesh" />
<item ref="paradigms" />
<item ref="pe08" />
<item ref="perluniprops" />
<item ref="pil" />
<item ref="pl196x" />
<item ref="porter_test" />
<item ref="ppattach" />
<item ref="problem_reports" />
<item ref="product_reviews_1" />
<item ref="product_reviews_2" />
<item ref="propbank" />
<item ref="pros_cons" />
<item ref="ptb" />
<item ref="punkt" />
<item ref="qc" />
<item ref="reuters" />
<item ref="rslp" />
<item ref="rte" />
<item ref="sample_grammars" />
<item ref="semcor" />
<item ref="senseval" />
<item ref="sentence_polarity" />
<item ref="sentiwordnet" />
<item ref="shakespeare" />
<item ref="sinica_treebank" />
<item ref="smultron" />
<item ref="snowball_data" />
<item ref="spanish_grammars" />
<item ref="state_union" />
<item ref="stopwords" />
<item ref="subjectivity" />
<item ref="swadesh" />
<item ref="switchboard" />
<item ref="tagsets" />
<item ref="timit" />
<item ref="toolbox" />
<item ref="treebank" />
<item ref="twitter_samples" />
<item ref="udhr" />
<item ref="udhr2" />
<item ref="unicode_samples" />
<item ref="universal_tagset" />
<item ref="universal_treebanks_v20" />
<item ref="vader_lexicon" />
<item ref="verbnet" />
<item ref="verbnet3" />
<item ref="webtext" />
<item ref="wmt15_eval" />
<item ref="word2vec_sample" />
<item ref="wordnet" />
<item ref="wordnet2021" />
<item ref="wordnet2022" />
<item ref="wordnet31" />
<item ref="wordnet_ic" />
<item ref="words" />
<item ref="ycoe" />
</collection>
<collection id="tests" name="Packages for running tests">
<item ref="averaged_perceptron_tagger" />
<item ref="porter_test" />
<item ref="twitter_samples" />
<item ref="wmt15_eval" />
<item ref="subjectivity" />
<item ref="framenet_v17" />
<item ref="product_reviews_1" />
<item ref="product_reviews_2" />
<item ref="vader_lexicon" />
<item ref="crubadan" />
<item ref="mte_teip5" />
<item ref="sentence_polarity" />
<item ref="universal_treebanks_v20" />
<item ref="panlex_swadesh" />
<item ref="nonbreaking_prefixes" />
<item ref="perluniprops" />
<item ref="pros_cons" />
<item ref="opinion_lexicon" />
<item ref="comparative_sentences" />
</collection>
<collection id="all-corpora" name="All the corpora">
<item ref="abc" />
<item ref="alpino" />
<item ref="bcp47" />
<item ref="biocreative_ppi" />
<item ref="brown" />
<item ref="brown_tei" />
<item ref="cess_cat" />
<item ref="cess_esp" />
<item ref="chat80" />
<item ref="city_database" />
<item ref="cmudict" />
<item ref="comparative_sentences" />
<item ref="comtrans" />
<item ref="conll2000" />
<item ref="conll2002" />
<item ref="conll2007" />
<item ref="crubadan" />
<item ref="dependency_treebank" />
<item ref="dolch" />
<item ref="europarl_raw" />
<item ref="extended_omw" />
<item ref="floresta" />
<item ref="framenet_v15" />
<item ref="framenet_v17" />
<item ref="gazetteers" />
<item ref="genesis" />
<item ref="gutenberg" />
<item ref="ieer" />
<item ref="inaugural" />
<item ref="indian" />
<item ref="jeita" />
<item ref="kimmo" />
<item ref="knbc" />
<item ref="lin_thesaurus" />
<item ref="mac_morpho" />
<item ref="machado" />
<item ref="masc_tagged" />
<item ref="movie_reviews" />
<item ref="mte_teip5" />
<item ref="names" />
<item ref="nombank.1.0" />
<item ref="nonbreaking_prefixes" />
<item ref="nps_chat" />
<item ref="omw" />
<item ref="omw-1.4" />
<item ref="opinion_lexicon" />
<item ref="panlex_swadesh" />
<item ref="paradigms" />
<item ref="pe08" />
<item ref="pil" />
<item ref="pl196x" />
<item ref="ppattach" />
<item ref="problem_reports" />
<item ref="product_reviews_1" />
<item ref="product_reviews_2" />
<item ref="propbank" />
<item ref="pros_cons" />
<item ref="ptb" />
<item ref="qc" />
<item ref="reuters" />
<item ref="rte" />
<item ref="semcor" />
<item ref="senseval" />
<item ref="sentence_polarity" />
<item ref="sentiwordnet" />
<item ref="shakespeare" />
<item ref="sinica_treebank" />
<item ref="smultron" />
<item ref="state_union" />
<item ref="stopwords" />
<item ref="subjectivity" />
<item ref="swadesh" />
<item ref="switchboard" />
<item ref="timit" />
<item ref="toolbox" />
<item ref="treebank" />
<item ref="twitter_samples" />
<item ref="udhr" />
<item ref="udhr2" />
<item ref="unicode_samples" />
<item ref="universal_treebanks_v20" />
<item ref="verbnet" />
<item ref="verbnet3" />
<item ref="webtext" />
<item ref="wordnet" />
<item ref="wordnet2021" />
<item ref="wordnet2022" />
<item ref="wordnet31" />
<item ref="wordnet_ic" />
<item ref="words" />
<item ref="ycoe" />
</collection>
<collection id="popular" name="Popular packages">
<item ref="cmudict" />
<item ref="gazetteers" />
<item ref="genesis" />
<item ref="gutenberg" />
<item ref="inaugural" />
<item ref="movie_reviews" />
<item ref="names" />
<item ref="shakespeare" />
<item ref="stopwords" />
<item ref="treebank" />
<item ref="twitter_samples" />
<item ref="omw" />
<item ref="omw-1.4" />
<item ref="wordnet" />
<item ref="wordnet2021" />
<item ref="wordnet31" />
<item ref="wordnet_ic" />
<item ref="words" />
<item ref="maxent_ne_chunker" />
<item ref="punkt" />
<item ref="snowball_data" />
<item ref="averaged_perceptron_tagger" />
</collection>
</collections>
</nltk_data>