This repository has been archived by the owner on Jul 6, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
lecture3.tex
164 lines (139 loc) · 5.76 KB
/
lecture3.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
\chapter{C Intro - Pointers, Arrays, Strings}
\section{Pointers}
Consider memory to be a single huge array, in which each cell stores some value.
\begin{itemize}
\item An \emph{address} refers to a particular memory location, represented by signed numbers.
\item A \emph{pointer} is a variable that contains the address of a variable.
\item Advantages: Faster \& more efficient to pass a pointer than a block of data; cleaner \& more compact code; low-level access
\item Disadvantages: Extremely buggy (especially with dynamic memory management); memory leaks
\end{itemize}
\subsection{C Pointer Dangers}
\begin{itemize}
\item \underline{WARNING}: Declaring a pointer allocates space to hold the pointer, it does NOT allocate the thing being pointed to.
\item Local variables in C are NOT initialized \(\rightarrow\) may contain garbage
\end{itemize}
\subsection{Pointer Syntax}
\begin{minted}[tabsize=4]{c}
int *p; // p is an address of a int
int x = 3; // x is an int
p = &x; // p points to address of x
int y = *p; // y gets the value p points to
*p = 5; // update the memory location p points to (x = 5)
\end{minted}
\begin{itemize}
\item \& = "reference" operator (grab the address)
\item * = "dereference" operator (follow the pointer)
\end{itemize}
\subsection{Pointers to \texttt{struct}}
\begin{minted}{c}
typedef struct { int x, y; } Point;
Point p; // initialize point object
Point *paddr; // declare pointer
/* dot notation */
int h = p1.x;
/* arrow notation */
int h = paddr->x;
int h = (*paddr).x;
\end{minted}
\subsection{Casting}
Casting pointers only changes how they are interpreted.
\begin{minted}{c}
void foo(void *v) {
((Point *) v)->x = 10;
}
\end{minted}
\begin{itemize}
\item Treat \texttt{v} as a pointer to a \texttt{Point} struct, sets the value of the \texttt{x} field (pointed to by \texttt{v}) to 10.
\item If \texttt{v} is a pointer to some other type of data \(\implies\) undefined behavior.
\end{itemize}
\subsection{Pointers to Functions}
\begin{minted}{c}
int (*fn) (void *, void *) = &foo
\end{minted}
\begin{itemize}
\item \texttt{fn} initially points to the function \texttt{foo}; it is a function that accepts two void * pointers and returns an int
\item \texttt{(*fn)(x, y)} calls the function
\end{itemize}
\subsection{Pointer Arithmetic}
Implicitly adds \texttt{1 * sizeof(type)} to the memory address.
\begin{itemize}
\item \texttt{sizeof()} Operator: Compile time operation that returns number of bytes in object.
\end{itemize}
\subsection{Pointers as function arguments}
\begin{minted}{c}
void f(int x, int *p) {
x = 5; // does nothing outside
*p = -9; // changes outside!
}
int a = 1, b = -3;
f(a, &b); // now a = 1 but b = -9.
\end{minted}
\section{Arrays}
\begin{itemize}
\item An array name is NOT a variable, but a \emph{pointer} to the 0th element.
\item Maintain a single source of truth: avoid multiple copies of literals by defining array size as a constant.
\begin{minted}{c}
// Bad pattern
int i, ar[10];
for(i = 0; i < 10; i++){ ... }
// Better pattern
const int ARRAY_SIZE = 10;
int i, a[ARRAY_SIZE];
for(i = 0; i < ARRAY_SIZE; i++){ ... }
\end{minted}
\item C doesn't check array bounds: if you read too far you will get garbage.
\end{itemize}
\subsection{Arrays as arguments}
An array is always passed as a \emph{pointer} You need to pass in the \emph{size} of the array, as \texttt{sizeof} will just evaluate to the size of the pointer representation (e.g.~32/64 bits).
\subsection{Array Name/Pointer Duality}
\texttt{char *pstr} and \texttt{char astr[]} are about nearly the same, except \texttt{astr++} is illegal (as though it were constant). We cannot mutate the array reference variable is that it is not a variable but just a compiler name.
\begin{itemize}
\item \texttt{ar[32]}: array variable with 32 elements, works like a pointer
\item \texttt{ar[0]}: same as \texttt{*ar}
\item \texttt{ar[i]}: same as \texttt{*(ar+i)}
\end{itemize}
\subsection{Arrays, Structures, and Pointers}
\begin{minted}{c}
typedef struct bar {
char *a; // A pointer to a character
char b[18]; // A statically sized array of characters
} Bar;
...
Bar *b = (Bar*) malloc(sizeof(struct bar));
b->a = malloc(sizeof(char) * 24);
\end{minted}
\begin{itemize}
\item Requires 24 bytes on a 32b arch: 4 bytes (pointer a) + 18 bytes (18-char array b) + 2 bytes (padding)
\end{itemize}
Examples:
\begin{itemize}
\item \texttt{b->b[5] = 'd'}
\begin{itemize}
\item \texttt{b} field starts on the 4th byte
\item Location written to the 10th byte pointed to by \texttt{b}: \\
\texttt{*((char *) b + 4 + 5) = 'd'}
\end{itemize}
\item \texttt{b->a[5] = 'c'}
\begin{itemize}
\item Location written to the first word pointed to by \texttt{b} treated as a pointer and advanced by 5: \\ \texttt{*(*((char **) b) + 5) = 'c'}
\end{itemize}
\item \texttt{b->a = b->b}
\begin{itemize}
\item Location written to the first word pointed to by \texttt{b}: \\
\texttt{*((char **)b) = ((char *) b) + 4}
\end{itemize}
\end{itemize}
\section{C Strings}
A string in C is simply an array of characters, where the last character is followed by a \emph{null terminator} (0 byte).
\subsection{Arguments to \texttt{main}}
\begin{minted}{c}
int main(int argc, char * argv[]);
\end{minted}
\begin{itemize}
\item \texttt{argc} is the \emph{number} of commands
\item \texttt{argv} points to an array of strings.
\begin{itemize}
\item pointer to an array of pointers!
\item \texttt{argv[0]} is the name of the executable.
\end{itemize}
\end{itemize}