Skip to content

Commit

Permalink
slight improvements to documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
karpathy committed Dec 2, 2011
1 parent 0fb81d6 commit f269372
Show file tree
Hide file tree
Showing 7 changed files with 16 additions and 12 deletions.
2 changes: 1 addition & 1 deletion demo1.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def demo1():
- Assumes 'pubs_nips' exists and that pdf text is present.
This can be obtained by running
nips_download_parse.py and then nips_add_pdftext.py, or by downloading it
from site. See README.txt
from site (https://sites.google.com/site/researchpooler/home)
Side-effects:
- will use os call to open a pdf with default program
Expand Down
3 changes: 2 additions & 1 deletion demo2.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ def demo2():
Pre-requisites:
- Assumes 'pubs_nips' exists. This can be obtained by running
nips_download_parse.py or by downloading it from site. See README.txt
nips_download_parse.py or by downloading it from site.
(https://sites.google.com/site/researchpooler/home)
Side-effects:
- will use os call to open a pdf with default program
Expand Down
3 changes: 2 additions & 1 deletion demo3.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ def demo3():
- Assumes 'pubs_nips' exists and contains pdf text inside
(under key 'pdf_text'). This can be obtained by running
nips_download_parse.py and then nips_add_pdftext.py
or by downloading it from site. See README.txt
or by downloading it from site.
(https://sites.google.com/site/researchpooler/home)
Side-effects:
- will use os call to open a pdf with default program
Expand Down
5 changes: 3 additions & 2 deletions google_search.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
Functions for searching Google and retrieving urls
Functions for searching Google and retrieving urls to PDFs
"""

import urllib
Expand All @@ -8,7 +8,8 @@
def getPDFURL(pdf_title):
"""
Search google for exact match of the title of this paper
and return the url to the pdf file.
and return the url to the pdf file, or 'notfound' if no exact match was
found.
pdf_title: string, name of the paper.
Returns url to the PDF, or 'notfound' if unsuccessful
Expand Down
2 changes: 1 addition & 1 deletion pdf_read.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

def convertPDF(pdf_path, codec='ascii'):
"""
Takes path to a PDF and returns the text inside it
Takes path to a PDF and returns the text inside it as string
pdf_path: string indicating path to a .pdf file. Can also be a URL starting
with 'http'
Expand Down
4 changes: 2 additions & 2 deletions repool_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@ def publicationSimilarityNaive(train_pubs, test_pub):
using a very simple overlap method.
train_pubs: list of publications
test_pub: a publication dictionary. Must have key 'pdf_text' with the
test_pub: a publication to compare to. Must contain 'pdf_text' key with the
bag of words that occur in that publication
returns list of (scores, one for each of the train_pubs. Returns -1 for
any score where a publication does not have the pdf_text available
any score where a publication does not have the pdf_text available.
"""

if not test_pub.has_key('pdf_text'):
Expand Down
9 changes: 5 additions & 4 deletions repool_util.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
""" Functions: useful general utils """

import pickle
import cPickle
import re
from os import startfile

def savePubs(filename, pubs_to_save):
"""
backup a list of publications
save a list of publications into a file using Python's pickle
filename: string
pubs_to_save: List of Publication objects
returns nothing
"""

file = open(filename, 'w')
pickle.dump(pubs_to_save, file)
cPickle.dump(pubs_to_save, file)
file.close()

def loadPubs(filename):
Expand All @@ -25,7 +25,7 @@ def loadPubs(filename):
"""

unpicklefile = open(filename, 'r')
pubs = pickle.load(unpicklefile)
pubs = cPickle.load(unpicklefile)
unpicklefile.close()
return pubs

Expand All @@ -44,6 +44,7 @@ def openPDFs(pdf_lst):
def stringToWordDictionary(str):
"""
Takes a string and returns dictionary that stores frequency of every word.
Some stop words are removed.
str: string
returns dictionary of word counts for each word. Example: d['hello'] -> 5
Expand Down

0 comments on commit f269372

Please sign in to comment.