Bug: Fix multiprocessing error on Mac OS (#78)

* readme * add mac os on travis * try windows * save * macos workaround * done * done
suchak1 · Nov 28, 2019 · 545562e · 545562e
1 parent 82a8288
commit 545562e
Show file tree

Hide file tree

Showing 17 changed files with 101 additions and 69 deletions.
diff --git a/.gitignore b/.gitignore
@@ -4,3 +4,5 @@ venv/*
 *.pyc
 .ipynb*
 torchdata/encoder-5-3000.pkl
+test/sampleVideo/test*
+test/sampleVideo/SampleVideo*
diff --git a/README.md b/README.md
@@ -27,6 +27,14 @@ To install the necessary packages, simply run:
 python -m pip install -r requirements.txt
 ```
 
+If there is a problem installing `torch`, try this command:
+
+```
+python -m pip install torch===1.3.1 torchvision===0.4.2 -f https://download.pytorch.org/whl/torch_stable.html
+```
+
+Then, install the rest of requirements as necessary.
+
 ### New Packages
 
 To remake the `requirements.txt` file, run:

diff --git a/docs/README_iteration2.md b/docs/README_iteration2.md
@@ -9,6 +9,14 @@
     - Note: Python 3.7 and a Unix-based OS or Windows Subsystem for Linux (WSL) are required.
     - Troubleshooting: If you encounter the error *ERROR: Could not install packages due to an EnvironmentError*, try adding `sudo` to the command or installing in a fresh virtual environment.
 
+    If there is a problem installing `torch`, try this command:
+
+    ```
+    python -m pip install torch===1.3.1 torchvision===0.4.2 -f https://download.pytorch.org/whl/torch_stable.html
+    ```
+
+    Then, install the rest of requirements as necessary.
+
 ### (2) How to Run Code
 - To start the GUI, run:
     ```
@@ -37,7 +45,8 @@ Note: Make sure you specify the right python version when you make these command
 
 ### (4) Acceptance tests
 
-Note: Multiprocessing does not currently work on Mac OS, only Linux and Windows Subsystem for Linux. This means the GUI will break after pressing start on Mac OS. We are working on a solution. In the meantime, the following works well on Linux (although uses 100% CPU due to multiprocessing).
+***Note:*** Multiprocessing/parallelized functions do not currently work on Mac OS, only Linux and Windows Subsystem for Linux. This means the GUI will break after pressing start on Mac OS. Our workaround is simply to disable multiprocessing if we detect Mac OS, and use slow/non-parallel versions of our functions. In the meantime, the following works efficiently on Linux (although uses 100% CPU due to multiprocessing).
+To speed up the processes but achieve less accurate results, increase the polling rate.
 
 Here are 3 acceptance tests:
 
@@ -115,3 +124,15 @@ First, run `python src/start.py` to start GUI. Then, choose the test video in `t
 
 ### (8) Notes for TA
 - Our prototype works in a limited capacity, restricted only by the accuracy of the API and ML models. For example, when searching the sample nature video (test/sampleVideo/SampleVideoNature.mp4), the only labels found by the classifier are ``{'ant', 'nematode', 'goldfish', 'leafhopper', 'lacewing', 'vine_snake', 'green_snake', 'common_newt', 'green_mamba', 'snail', 'eft', 'cucumber', 'slug', 'lycaenid', 'bell_pepper', 'wine_bottle', 'spider_web', 'Granny_Smith'}``. The problem here is that a user would be unlikely to search the video for terms like "bell pepper" or "lacewing", and would receive no clips if the search terms were "water" or "leaf". We have several rough ideas to work around this, e.g. query parent child relationships directly through API or utilize semantic similarity functionality to fetch words in the imagenet categories. As is, our implementation does what we intended from our proposal, however we'd ideally like to use the time before Milestone 5 to explore these rough ideas and potentially improve the program for the presentation.
+
+
+The following are additions left to complete before the Dec deadline.
+
+***TODO:***
+
+1. displaying captions for outputted clips - I believe this in progress by Michael or Jeremy?
+2. silence console warnings and display status bar for at least `classify_frames` and maybe `get_frames`
+    - Right now, everything runs one thread except for multiprocessing. We should spawn new thread for Job, so that GUI is still responsive after pressing start.
+3. switch GUI from Tk to ttk and use modern theme - aesthetic change
+4. parallelization / multiprocessing for `get_frames` - should be straightforward following the example of `classify_frames`
+    - will give next best speed boost after `classify_frames`
diff --git a/src/model.py b/src/model.py
@@ -34,6 +34,8 @@ def __init__(self, settings):
         else:
             self.video_path = None
             self.settings = None
+        # disable multiprocessing on mac os
+        self.multi = sys.platform != 'darwin'
 
 
     def do_the_job(self):
@@ -82,9 +84,12 @@ def classify_frame(self, frame):
     def classify_frames(self):
         frames = self.get_frames()
 
-        # multiprocessing
-        with Pool() as pool:
-            results = pool.map(self.classify_frame, frames)
+        if self.multi:
+            # multiprocessing
+            with Pool() as pool:
+                results = pool.map(self.classify_frame, frames)
+        else:
+            results = [(t, self.score(Worker().classify_img(f)) / 100) for (f, t) in frames]
 
         return list(sorted(results, key=lambda x: x[0]))
 

diff --git a/src/view.py b/src/view.py
@@ -1,4 +1,4 @@
-from model import Job,Seer
+from model import Job, Seer
 from tkinter import *
 from tkinter.filedialog import askopenfilename
 from PIL import Image
@@ -32,32 +32,7 @@ def get_settings(self):
         return {"video": self.video_path, "settings": self.settings}
 
     def set_settings(self, values, path):
-
-        #Sets the settings of the GUI and includes the video path file.
-
-        #values is a dictionary in the following format:
-        #        'conf': float,
-        #        'poll': int,
-        #        'anti': int,
-        #        'search': list of strings
-        #    }
-        #   'conf': confidence interval for image classification. Must be a value between 0 and 1
-        #    'poll': the framerate poll (frequency of frames to classify). Must be a value >= 0
-        #    'anti': the treshold for how long a clip can contain frames not containing the search question
-        #            (anything longer will be the bounds of the clip). Must be a value >= 0
-        #    'search': a list of search terms to use. Must contain at least one string.
-
-        # path is the video_input path
-
-        #if not os.path.exists(path):
-        #    print('Here1')
-            #self.set_default_settings()
-        #    return False
-        #print(len(values['search']))
-        #print(values['search'][0])
-        #print(path)
-        #print(type(path))
-
+        # Sets the settings of the GUI and includes the video path file.
         expected_keys = ['conf', 'poll', 'anti', 'runtime', 'search']
         missing = [x for x in expected_keys if x not in values.keys()]
         if len(missing) > 0:
@@ -263,16 +238,16 @@ def display_settings():
         kill_button.grid(column=0, row=100)
 
         '''
-        You would need to set up a way to select output clips and then hit a button which produces 
-        a caption for it. The process would be: get the path of the clip, extract a frame from the 
-        middle of the clip, call the tell_us_oh_wise_one(frame) method from the Seer object which 
+        You would need to set up a way to select output clips and then hit a button which produces
+        a caption for it. The process would be: get the path of the clip, extract a frame from the
+        middle of the clip, call the tell_us_oh_wise_one(frame) method from the Seer object which
         should be an attribute of GUI, and take the string it returns and print it to the GUI somewhere,
         '''
 
         temp_lbl4 = Label(win_content, text="", wraplength="200px", justify=CENTER)
         temp_lbl4.grid(column=3, row=100, columnspan=4)
         temp_lbl4.grid_remove()
-            
+
         def get_caption():
             filename = askopenfilename()
             caption_videopath = str(filename)

diff --git a/test/sampleVideo/SampleVideo_1280x720_1mb_subclip(-1.0,30.0).mp4 b/test/sampleVideo/SampleVideo_1280x720_1mb_subclip(-1.0,30.0).mp4
diff --git a/test/sampleVideo/SampleVideo_1280x720_1mb_subclip(0,5).mp4 b/test/sampleVideo/SampleVideo_1280x720_1mb_subclip(0,5).mp4
diff --git a/test/sampleVideo/SampleVideo_1280x720_1mb_subclip(0.0,100000000000.0).mp4 b/test/sampleVideo/SampleVideo_1280x720_1mb_subclip(0.0,100000000000.0).mp4
diff --git a/test/sampleVideo/SampleVideo_1280x720_1mb_subclip(0.0,3.0).mp4 b/test/sampleVideo/SampleVideo_1280x720_1mb_subclip(0.0,3.0).mp4
diff --git a/test/sampleVideo/SampleVideo_1280x720_1mb_subclip(1.0,3.0).mp4 b/test/sampleVideo/SampleVideo_1280x720_1mb_subclip(1.0,3.0).mp4
diff --git a/test/sampleVideo/SampleVideo_1280x720_1mb_subclip(3.0,1000000.0).mp4 b/test/sampleVideo/SampleVideo_1280x720_1mb_subclip(3.0,1000000.0).mp4
diff --git a/test/sampleVideo/SampleVideo_1280x720_1mb_subclip(4,5).mp4 b/test/sampleVideo/SampleVideo_1280x720_1mb_subclip(4,5).mp4
diff --git a/test/sampleVideo/testEnd.mp4 b/test/sampleVideo/testEnd.mp4
diff --git a/test/sampleVideo/testFull.mp4 b/test/sampleVideo/testFull.mp4
diff --git a/test/sampleVideo/testMid.mp4 b/test/sampleVideo/testMid.mp4
diff --git a/test/sampleVideo/testStart.mp4 b/test/sampleVideo/testStart.mp4
diff --git a/test/test_controller.py b/test/test_controller.py
@@ -18,30 +18,50 @@ def test_constructor():
     check.is_true("make_clip" in dir(w))
 
 
+image_names = [
+    'banana',
+    'basketball',
+    'carton',
+    'cucumber',
+    'fountain',
+    'golden retriever',
+    'goldfish',
+    'passenger car',
+    'pop bottle',
+    'seashore',
+    'space shuttle',
+    'sports car',
+    'suit',
+    'tabby',
+    'volcano'
+]
+
+related = [
+    'plantain',
+    'nba',
+    'box',
+    'zucchini',
+    'spring',
+    'dog',
+    'carp',
+    'coach',
+    'soda',
+    'beach',
+    'spaceship',
+    'roadster',
+    'tuxedo',
+    'cat',
+    'lava'
+]
+
 def test_classify_img():
     image_dir = '/sampleImage/'
     # Changes to tests:
     # we changed the images classified due to the limitations of our ML model
     # since the model is only trained on 1000 object categories
     # we will create a helper function in the next iteration
     # to test for semantic simularity and get better search results
-    image_names = [
-        'banana',
-        'basketball',
-        'carton',
-        'cucumber',
-        'fountain',
-        'golden retriever',
-        'goldfish',
-        'passenger car',
-        'pop bottle',
-        'seashore',
-        'space shuttle',
-        'sports car',
-        'suit',
-        'tabby',
-        'volcano'
-    ]
+
     # Changes to tests:
     # wrong_names is a rotation of original image_names
     # as it is unlikely that basketball
@@ -62,33 +82,34 @@ def test_classify_img():
 
     for idx, name in enumerate(image_names):
         img = Image.open(test_folder + image_dir + name + img_ext)
+        classifications = w.classify_img(img)
         # should all be true
         # (that 'banana' is in classification dict for 'banana.jpg' and so on)
-        check.is_in(name, w.classify_img(img))
+        check.is_in(name, classifications)
 
         # now let's try assertions that should definitely be wrong
         # (that 'volcano' is in the classification dict for 'banana.jpg')
-        check.is_not_in(wrong_names[idx], w.classify_img(img))
+        check.is_not_in(wrong_names[idx], classifications)
 
 
 def test_get_related_words():
     w = Worker()
+
+    originals = [name.replace(' ', '_') for name in image_names]
+    wrong_names = originals[1:] + originals[:1]
+
     check.equal({}, w.get_related_words(''))
-    check.is_in('plantain', w.get_related_words("banana"))
-    check.is_in('nba', w.get_related_words("basketball"))
-    check.is_in('box', w.get_related_words("carton"))
-    check.is_in('zucchini', w.get_related_words("cucumber"))
-    check.is_in('spring', w.get_related_words("fountain"))
-    check.is_in('dog', w.get_related_words("golden_retriever"))
-    check.is_in('carp', w.get_related_words("goldfish"))
-    check.is_in('coach', w.get_related_words("passenger_car"))
-    check.is_in('soda', w.get_related_words("pop_bottle"))
-    check.is_in('beach', w.get_related_words("seashore"))
-    check.is_in('spaceship', w.get_related_words("space_shuttle"))
-    check.is_in('roadster', w.get_related_words("sports_car"))
-    check.is_in('tuxedo', w.get_related_words("suit"))
-    check.is_in('cat', w.get_related_words("tabby"))
-    check.is_in('lava', w.get_related_words("volcano"))
+
+    for idx, name in enumerate(originals):
+        related_set = w.get_related_words(name)
+        # should all be true
+        # (that 'plantain' is in related words set for 'banana' and so on)
+
+        check.is_in(related[idx], related_set)
+
+        # now let's try assertions that should definitely be wrong
+        # (that 'lava' is in the related words set for 'banana')
+        check.is_not_in(wrong_names[idx], related_set)
 
 
 def test_make_clip_negative_time():