slight improvements to documentation

Mtehabsim · Dec 2, 2011 · f269372 · f269372
1 parent 0fb81d6
commit f269372
Show file tree

Hide file tree

Showing 7 changed files with 16 additions and 12 deletions.
diff --git a/demo1.py b/demo1.py
@@ -16,7 +16,7 @@ def demo1():
     - Assumes 'pubs_nips' exists and that pdf text is present. 
       This can be obtained by running 
       nips_download_parse.py and then nips_add_pdftext.py, or by downloading it 
-      from site. See README.txt
+      from site (https://sites.google.com/site/researchpooler/home)
     
     Side-effects:
     - will use os call to open a pdf with default program

diff --git a/demo2.py b/demo2.py
@@ -13,7 +13,8 @@ def demo2():
     
     Pre-requisites:
     - Assumes 'pubs_nips' exists. This can be obtained by running 
-      nips_download_parse.py or by downloading it from site. See README.txt
+      nips_download_parse.py or by downloading it from site.
+      (https://sites.google.com/site/researchpooler/home)
     
     Side-effects:
     - will use os call to open a pdf with default program

diff --git a/demo3.py b/demo3.py
@@ -18,7 +18,8 @@ def demo3():
     - Assumes 'pubs_nips' exists and contains pdf text inside 
       (under key 'pdf_text'). This can be obtained by running 
       nips_download_parse.py and then nips_add_pdftext.py 
-      or by downloading it from site. See README.txt
+      or by downloading it from site.
+      (https://sites.google.com/site/researchpooler/home)
     
     Side-effects:
     - will use os call to open a pdf with default program

diff --git a/google_search.py b/google_search.py
@@ -1,5 +1,5 @@
 """
-Functions for searching Google and retrieving urls
+Functions for searching Google and retrieving urls to PDFs
 """
 
 import urllib
@@ -8,7 +8,8 @@
 def getPDFURL(pdf_title):
     """
     Search google for exact match of the title of this paper 
-    and return the url to the pdf file.
+    and return the url to the pdf file, or 'notfound' if no exact match was 
+    found.
     
     pdf_title: string, name of the paper.
     Returns url to the PDF, or 'notfound' if unsuccessful

diff --git a/pdf_read.py b/pdf_read.py
@@ -11,7 +11,7 @@
 
 def convertPDF(pdf_path, codec='ascii'):
     """
-    Takes path to a PDF and returns the text inside it
+    Takes path to a PDF and returns the text inside it as string
     
     pdf_path: string indicating path to a .pdf file. Can also be a URL starting 
               with 'http'

diff --git a/repool_analysis.py b/repool_analysis.py
@@ -9,11 +9,11 @@ def publicationSimilarityNaive(train_pubs, test_pub):
     using a very simple overlap method.
     
     train_pubs: list of publications
-    test_pub: a publication dictionary. Must have key 'pdf_text' with the 
+    test_pub: a publication to compare to. Must contain 'pdf_text' key with the 
               bag of words that occur in that publication
     
     returns list of (scores, one for each of the train_pubs. Returns -1 for
-            any score where a publication does not have the pdf_text available
+            any score where a publication does not have the pdf_text available.
     """
 
     if not test_pub.has_key('pdf_text'): 

diff --git a/repool_util.py b/repool_util.py
@@ -1,20 +1,20 @@
 """ Functions: useful general utils """
 
-import pickle
+import cPickle
 import re
 from os import startfile
 
 def savePubs(filename, pubs_to_save):
     """ 
-    backup a list of publications
+    save a list of publications into a file using Python's pickle
     filename: string
     pubs_to_save: List of Publication objects
     
     returns nothing
     """
 
     file = open(filename, 'w')
-    pickle.dump(pubs_to_save, file)
+    cPickle.dump(pubs_to_save, file)
     file.close()
 
 def loadPubs(filename):
@@ -25,7 +25,7 @@ def loadPubs(filename):
     """
 
     unpicklefile = open(filename, 'r')
-    pubs = pickle.load(unpicklefile)
+    pubs = cPickle.load(unpicklefile)
     unpicklefile.close()
     return pubs
 
@@ -44,6 +44,7 @@ def openPDFs(pdf_lst):
 def stringToWordDictionary(str):
     """
     Takes a string and returns dictionary that stores frequency of every word.
+    Some stop words are removed.
     
     str: string
     returns dictionary of word counts for each word. Example: d['hello'] -> 5