-
Notifications
You must be signed in to change notification settings - Fork 0
/
lesson06.py
168 lines (136 loc) · 7.16 KB
/
lesson06.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
# Welcome to the sixth lesson in the Yesselman Group's Python series
# Topics covered: sets, review of strings
########################################################################################################################
# Part I: sets
# In programming, a container object is something that holds other types of data and usually has no intrinsic properties
# iteself. Previously we have covered lists, but today we will be talking about sets, which are a type of a associative
# container, which just means that element storage is dependendent on the element's identity. That may sound confusing,
# but all it means is that a set stores a unique list of objects. Consider the example below and call show_container_contents
# on both the list and set below. What are the outputs?
my_nums = [10]*100 + [20]*100 + [30]*100
num_set = set(my_nums)
def show_container_contents(input_container):
for element in input_container:
print(element)
# When you want to declare a set, the easiest way is a literal declaration with braces. Below are some examples:
my_set = {
1,2,3,4,5
}
string_set = {
"A","B","C"
}
# Unlike lists that use the .append() method, you used .add() to add to a list, as follows:
string_set.add("D")
# additionally there are .clear() and .remove() methods as with lists. See the example below that clears a set:
def clear_set(input_set):
"""Method that clears a set and shows size before and after"""
print("Size before clearing {}".format(len(input_set)))
input_set.clear()
print("Size after clearing {}".format(len(input_set)))
# Sets are very useful for analyzing large data sets as you can easily see all types of an entry. Below are some
# examples of using sets:
class_scores = [90,93,95,80,94,90,49,93,87,95]
def get_unique_scores(score_list):
"""Method that gets the unique scores from a set of test scores"""
score_set = set(score_list)
print("There are {} unique scores in this list".format(len(score_set)))
def is_lucky_number(input_number):
"""Method that checks if an input number is a lucky number."""
lucky_number_list = {1, 3, 7, 9, 13, 15}
if input_number in lucky_number_list:
print("{} is a lucky number".format(input_number))
else:
print("{} is not a lucky number".format(input_number))
########################################################################################################################
# Part II: Adding more to strings
# String maninpulation is an extremely important part of programming and data analysis as information is often outputted to
# a string from other applications. The first method to cover is called .split(). In the paranthesis, you put a delimiter
# (though they are whitespace by default) and then a list of the broken up strings is produced. Consider the following code:
raw_data = "1 2 3 4 5"
strings = raw_data.split()
# for tk in strings:
# print(tk)
# What does the above code do? Given what was discussed above, below is a splitting of some comma-separated data
comma_separated = "10,9,8,7,6,5,4,3,2,1"
data_tokens = comma_separated.split(",")
# print(len(data_tokens))
# The reverse of .split() is .join(). It is called in the following manner: DELIMITER.join([STRING_LIST]). Consider the
# example below:
my_strings = ["first","second","third","fourth"]
connected = ",".join(my_strings)
# print(connected)
# To see if a string contains a substring, you can make use of .find(). This method takes a substring as an argument and
# returns the index that the desired substring begins at, else -1 if the string does not contain that index. Below are
# are some examples:
big_string = "first_second_third"
# print(big_string.find("second"))
# print(big_string.find("fourth"))
def is_rna_strand(input_sequence):
"""Method that checks if the input sequence is RNA (has U) or not"""
if input_sequence.find("U") != -1:
return True
else:
return False
# Lastly, you can change the case of a string with .upper() and .lower(). This can be helpful to avoid annoying case-related issues
def name_in_class(input_name, input_class):
"""Method that checks if a student's name is in a class"""
# first, change the name to lower case
student_name = input_name.lower()
# now loop through each name in the input class
for candidate in input_class:
# see if the candidate name contains the student name
if candidate.lower().find(student_name) != -1:
return True
# if we get through the entire class and it's not true, return False
return False
# Miscellaneous examples:
# Below are some miscellaneous examples of how to use sets and some of these more advanced string methods()
def check_unique_scores(score_list):
"""Method that checks if the scores presented are unique. Returns true if only unique, false if there are duplicates"""
return len(score_list) == len(set(score_list))
def summarize_names(name_list):
"""Method that takes list of names as input and prints out a basic summary"""
unique_name_list = set(name_list)
for name in unique_name_list:
print("Name: {}\tOccurences: {}".format(name,name_list.count(name)))
print("Total names: {}\tTotal People: {}".format(len(unique_name_list),len(name_list)))
def string_to_values(input_string):
"""Method that takes a string of '|'-delimited values and converts them to a list of values."""
value_list = []
for token in input_string.split('|'):
value_list.append(float(token))
return value_list
def values_to_string(input_values):
"""Method that takes a list of values and converts them to a '|'-delimted string"""
token_list = []
for value in input_values:
token_list.append(str(value))
return '|'.join(token_list)
########################################################################################################################
# Homework
# (T/F) For a list L, len(L) < len(set(L))
# (T/F) BASE.find(SUBSTRING) returns the index at which SUBSTRING begins in BASE or -1 if SUBSTRING is not in BASE
# (T/F) "1 2 3 4 5".split() returns a list of integers
# What is main advantage of using a set over a list?
# How do you add an item to a set and then check if that item is in the set? How is this different than for strings?
# Do .lower() and .upper() impact non-alphabetic characters? If so, how?
# For a list with N strings, when join is called, how many times is the joining string added to the output string?
# Bug Busters.
# Jim is trying to find the max number from the following string but is not getting the right answer. While he wants
# 500, the supplied answer is 4. What is he doing wrong?
numbers = "01 2 3 4 00500 -1".split()
max(numbers)
def item_count(input_list,input_item):
pass
# create a function that tells how many times 'input_item' is found in 'input_list'.
def long_A_stretch(input_sequence):
pass
# create a function that can tell if there is a continuous stretch of 20+ A's in the supplied sequence
def parse_values(value_string):
pass
# create a function that takes a string as follows "1|2|3|4|5", and returns a list of floats. Additionally, if a value is
# negative, set it to 0
def thresholding_entries(entry_list,threshold):
pass
# create a function that returns a list of entries whose occurrence >= threshold. I.e., for input list [1,2,3,4,4,4] and
# threshold = 2, the answer would be [4].