CSE 333
Lecture 6 - data structures
Hal Perkins
Department of Computer Science & Engineering
University of Washington
CSE333 lec 6 C.5 // 06-29-12 // perkins
Administrivia
Exercises:
- ex5 is out: clean up the code from section yesterday, split into
separate files, fix bugs
More towards the end of the hour
- no more exercises due until end of week after HW1
HW:
- HW1 due very soon; get going NOW if you havent yet
CSE333 lec 6 C.5 // 06-29-12 // perkins
Todays topics:
- implementing data structures in C
- multi-file C programs
- brief intro to the C preprocessor
CSE333 lec 6 C.5 // 06-29-12 // perkins
Lets build a simple linked list
Youve seen a linked list in CSE143
- each node in a linked list contains:
some element as its payload
a pointer to the next node in the linked list
- the last node in the list contains a NULL pointer (or some
other indication that it is the last node)
element Z
head
element Y
element X
CSE333 lec 6 C.5 // 06-29-12 // perkins
Linked list node
Lets represent a linked list node with a struct
- and, for now, assume each element is an int
#include <stdio.h>
typedef struct Node {
int element;
struct Node *next;
} Node;
element next
n1
int main(int argc, char **argv) {
Node n1, n2;
n2.element = 2;
n2.next = NULL;
n1.element = 1;
n2.next = &n2;
return 0;
1
element next
n2
NULL
manual_list.c
CSE333 lec 6 C.5 // 06-29-12 // perkins
PushOntoList
push_list.c
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
(main) list
typedef struct Node {
int element;
struct Node *next;
} Node;
NULL
Node *Push(Node *head, int e) {
Node *n = (Node *) malloc(sizeof(Node));
assert(n != NULL);
n->element = e;
n->next = head;
// crashes if false
return n;
}
int main(int argc, char **argv) {
Node *list = NULL;
list = Push(list, 1);
list = Push(list, 2);
return 0;
}
CSE333 lec 6 C.5 // 06-29-12 // perkins
PushOntoList
push_list.c
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
(main) list
typedef struct Node {
int element;
struct Node *next;
} Node;
NULL
Node *Push(Node *head, int e) {
Node *n = (Node *) malloc(sizeof(Node));
assert(n != NULL);
n->element = e;
n->next = head;
// crashes if false
return n;
}
int main(int argc, char **argv) {
Node *list = NULL;
list = Push(list, 1);
list = Push(list, 2);
return 0;
}
CSE333 lec 6 C.5 // 06-29-12 // perkins
PushOntoList
push_list.c
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
(main) list
typedef struct Node {
int element;
struct Node *next;
} Node;
Node *Push(Node *head, int e) {
Node *n = (Node *) malloc(sizeof(Node));
assert(n != NULL);
n->element = e;
n->next = head;
// crashes if false
NULL
(POL) head
NULL
(POL) e
(POL) n
???
return n;
}
int main(int argc, char **argv) {
Node *list = NULL;
list = Push(list, 1);
list = Push(list, 2);
return 0;
}
CSE333 lec 6 C.5 // 06-29-12 // perkins
PushOntoList
push_list.c
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
(main) list
typedef struct Node {
int element;
struct Node *next;
} Node;
Node *Push(Node *head, int e) {
Node *n = (Node *) malloc(sizeof(Node));
assert(n != NULL);
n->element = e;
n->next = head;
// crashes if false
NULL
(POL) head
NULL
(POL) e
(POL) n
return n;
}
int main(int argc, char **argv) {
Node *list = NULL;
element next
???
???
list = Push(list, 1);
list = Push(list, 2);
return 0;
}
CSE333 lec 6 C.5 // 06-29-12 // perkins
PushOntoList
push_list.c
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
(main) list
typedef struct Node {
int element;
struct Node *next;
} Node;
Node *Push(Node *head, int e) {
Node *n = (Node *) malloc(sizeof(Node));
assert(n != NULL);
n->element = e;
n->next = head;
// crashes if false
NULL
(POL) head
NULL
(POL) e
(POL) n
return n;
}
int main(int argc, char **argv) {
Node *list = NULL;
element next
???
???
list = Push(list, 1);
list = Push(list, 2);
return 0;
}
CSE333 lec 6 C.5 // 06-29-12 // perkins
PushOntoList
push_list.c
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
(main) list
typedef struct Node {
int element;
struct Node *next;
} Node;
Node *Push(Node *head, int e) {
Node *n = (Node *) malloc(sizeof(Node));
assert(n != NULL);
n->element = e;
n->next = head;
// crashes if false
NULL
(POL) head
NULL
(POL) e
(POL) n
return n;
}
int main(int argc, char **argv) {
Node *list = NULL;
element next
???
list = Push(list, 1);
list = Push(list, 2);
return 0;
}
CSE333 lec 6 C.5 // 06-29-12 // perkins
PushOntoList
push_list.c
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
(main) list
typedef struct Node {
int element;
struct Node *next;
} Node;
Node *Push(Node *head, int e) {
Node *n = (Node *) malloc(sizeof(Node));
assert(n != NULL);
n->element = e;
n->next = head;
// crashes if false
NULL
(POL) head
NULL
(POL) e
(POL) n
return n;
}
int main(int argc, char **argv) {
Node *list = NULL;
element next
NULL
list = Push(list, 1);
list = Push(list, 2);
return 0;
}
CSE333 lec 6 C.5 // 06-29-12 // perkins
PushOntoList
push_list.c
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
(main) list
typedef struct Node {
int element;
struct Node *next;
} Node;
Node *Push(Node *head, int e) {
Node *n = (Node *) malloc(sizeof(Node));
assert(n != NULL);
n->element = e;
n->next = head;
// crashes if false
(POL) head
NULL
(POL) e
(POL) n
return n;
}
int main(int argc, char **argv) {
Node *list = NULL;
element next
NULL
list = Push(list, 1);
list = Push(list, 2);
return 0;
}
CSE333 lec 6 C.5 // 06-29-12 // perkins
PushOntoList
push_list.c
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
(main) list
typedef struct Node {
int element;
struct Node *next;
} Node;
Node *Push(Node *head, int e) {
Node *n = (Node *) malloc(sizeof(Node));
assert(n != NULL);
n->element = e;
n->next = head;
// crashes if false
return n;
}
int main(int argc, char **argv) {
Node *list = NULL;
element next
NULL
list = Push(list, 1);
list = Push(list, 2);
return 0;
}
CSE333 lec 6 C.5 // 06-29-12 // perkins
PushOntoList
push_list.c
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
(main) list
typedef struct Node {
int element;
struct Node *next;
} Node;
Node *Push(Node *head, int e) {
Node *n = (Node *) malloc(sizeof(Node));
assert(n != NULL);
n->element = e;
n->next = head;
// crashes if false
(POL) head
(POL) e
(POL) n
???
return n;
}
int main(int argc, char **argv) {
Node *list = NULL;
element next
NULL
list = Push(list, 1);
list = Push(list, 2);
return 0;
}
CSE333 lec 6 C.5 // 06-29-12 // perkins
PushOntoList
push_list.c
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
(main) list
typedef struct Node {
int element;
struct Node *next;
} Node;
Node *Push(Node *head, int e) {
Node *n = (Node *) malloc(sizeof(Node));
assert(n != NULL);
n->element = e;
n->next = head;
// crashes if false
(POL) head
(POL) e
(POL) n
return n;
}
int main(int argc, char **argv) {
Node *list = NULL;
list = Push(list, 1);
list = Push(list, 2);
return 0;
element next
NULL
element next
???
???
}
CSE333 lec 6 C.5 // 06-29-12 // perkins
PushOntoList
push_list.c
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
(main) list
typedef struct Node {
int element;
struct Node *next;
} Node;
Node *Push(Node *head, int e) {
Node *n = (Node *) malloc(sizeof(Node));
assert(n != NULL);
n->element = e;
n->next = head;
// crashes if false
(POL) head
(POL) e
(POL) n
return n;
}
int main(int argc, char **argv) {
Node *list = NULL;
list = Push(list, 1);
list = Push(list, 2);
return 0;
element next
NULL
element next
???
???
}
CSE333 lec 6 C.5 // 06-29-12 // perkins
PushOntoList
push_list.c
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
(main) list
typedef struct Node {
int element;
struct Node *next;
} Node;
Node *Push(Node *head, int e) {
Node *n = (Node *) malloc(sizeof(Node));
assert(n != NULL);
n->element = e;
n->next = head;
// crashes if false
(POL) head
(POL) e
(POL) n
return n;
}
int main(int argc, char **argv) {
Node *list = NULL;
list = Push(list, 1);
list = Push(list, 2);
return 0;
element next
NULL
element next
???
}
CSE333 lec 6 C.5 // 06-29-12 // perkins
PushOntoList
push_list.c
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
(main) list
typedef struct Node {
int element;
struct Node *next;
} Node;
Node *Push(Node *head, int e) {
Node *n = (Node *) malloc(sizeof(Node));
assert(n != NULL);
n->element = e;
n->next = head;
// crashes if false
(POL) head
(POL) e
(POL) n
return n;
}
int main(int argc, char **argv) {
Node *list = NULL;
list = Push(list, 1);
list = Push(list, 2);
return 0;
element next
NULL
element next
}
CSE333 lec 6 C.5 // 06-29-12 // perkins
PushOntoList
push_list.c
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
(main) list
typedef struct Node {
int element;
struct Node *next;
} Node;
Node *Push(Node *head, int e) {
Node *n = (Node *) malloc(sizeof(Node));
assert(n != NULL);
n->element = e;
n->next = head;
// crashes if false
(POL) head
(POL) e
(POL) n
return n;
}
int main(int argc, char **argv) {
Node *list = NULL;
list = Push(list, 1);
list = Push(list, 2);
return 0;
element next
NULL
element next
}
CSE333 lec 6 C.5 // 06-29-12 // perkins
PushOntoList
push_list.c
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
(main) list
typedef struct Node {
int element;
struct Node *next;
} Node;
Node *Push(Node *head, int e) {
Node *n = (Node *) malloc(sizeof(Node));
assert(n != NULL);
n->element = e;
n->next = head;
// crashes if false
return n;
}
int main(int argc, char **argv) {
Node *list = NULL;
list = Push(list, 1);
list = Push(list, 2);
return 0;
element next
NULL
element next
}
CSE333 lec 6 C.5 // 06-29-12 // perkins
PushOntoList
push_list.c
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
a (benign) leak!!
try running with valgrind:
typedef struct Node {
int element;
struct Node *next;
} Node;
bash$ gcc -o push_list -g -Wall
push_list.c
Node *Push(Node *head, int e) {
Node *n = (Node *) malloc(sizeof(Node));
assert(n != NULL);
n->element = e;
n->next = head;
bash$ valgrind --leak-check=full
./push_list
// crashes if false
return n;
}
int main(int argc, char **argv) {
Node *list = NULL;
list = Push(list, 1);
list = Push(list, 2);
return 0;
element next
NULL
element next
}
CSE333 lec 6 C.5 // 06-29-12 // perkins
A generic linked list
Previously, our linked list elements were of type int
- what if we want to let our customer decide the element type?
- idea: let them push a generic pointer -- i.e., a (void *)
typedef struct Node {
void *element;
struct Node *next;
} Node;
element
Node *Push(Node *head, void *e) {
Node *n = (Node *) malloc(sizeof(Node));
assert(n != NULL);
n->element = e;
n->next = head;
}
// crashes if false
next
element
next
NULL
return n;
CSE333 lec 6 C.5 // 06-29-12 // perkins
Using a generic linked list
To use it, customers will need to use type casting
- convert their data type to a (void *) before pushing
- convert from a (void *) back to their data type when accessing
typedef struct Node {
void *element;
struct Node *next;
} Node;
Node *Push(Node *head, void *e);
manual_list_void.c
// assume last slides code
int main(int argc, char **argv) {
char *hello = "Hi there!";
char *goodbye = "Bye bye.";
Node *list = NULL;
list = Push(list, (void *) hello);
list = Push(list, (void *) goodbye);
printf("payload: '%s'\n", (char *) ((list->next)->element) );
return 0;
}
CSE333 lec 6 C.5 // 06-29-12 // perkins
Using a generic linked list
Results in:
(main) goodbye
(main) hello
(main) list
element
. \0
next
element
next
! \0
NULL
CSE333 lec 6 C.5 // 06-29-12 // perkins
Multi-file C programs
Lets create a linked list module
- a module is a self-contained piece of an overall program
has externally visible functions that customers can invoke
has externally visible typedefs, and perhaps global variables, that
customers can use
may have internal functions, typedefs, global variables that
customers should not look at
- the modules interface is its set of public functions, typedefs,
and global variables
CSE333 lec 6 C.5 // 06-29-12 // perkins
Modularity
The degree to which
components of a system
can be separated and
recombined
- loose coupling and
main program
separation of concerns
- modules can be
linked
list
hash
table
developed independently
- modules can be re-used
in different projects
CSE333 lec 6 C.5 // 06-29-12 // perkins
C header files
header: a C file whose only purpose is to be #included
- generally a filename with the .h extension
- holds the variables, types, and function prototype declarations
that make up the interface to a module
the main idea
- every name.c intended to be a module has a name.h
- name.h declares the interface to that module
- other modules that want to use name will #include name.h
and they should assume as little as possible about the
implementation in name.c
CSE333 lec 6 C.5 // 06-29-12 // perkins
C module conventions
Most C projects adhere to the following rules:
- .h files never contain definitions, only declarations
- .c files never contain prototype declarations for functions that
are intended to be exported through the module interface
those function prototype declarations belong in the .h file
- never #include a .c file -- only #include .h files
- any .c file with an associated .h file should be able to be
compiled into a .o file
CSE333 lec 6 C.5 // 06-29-12 // perkins
#include and the C preprocessor
The C preprocessor (cpp) transforms your source code
before the compiler runs
- transforms your original C source code into transformed
Csource code
- processes the directives it finds in your code (#something)
#include ll.h -- replaces with post-processed content of ll.h
#define PI 3.1415 -- defines a symbol, replaces later occurrences
and there are several others well see soon...
- run on your behalf by gcc during compilation
CSE333 lec 6 C.5 // 06-29-12 // perkins
Example
Lets manually run the preprocessor on cpp_example.c:
cpp is the preprocessor
-P suppresses some extra
debugging annotations
#define BAR 2 + FOO
typedef long long int verylong;
cpp_example.h
bash$ cpp -P cpp_example.c out.c
bash$ cat out.c
#define FOO 1
#include "cpp_example.h"
int main(int argc, char **argv)
{
int x = FOO;
// a comment
int y = BAR;
verylong z = FOO + BAR;
return 0;
}
typedef long long int verylong;
int main(int argc, char **argv) {
int x = 1;
int y = 2 + 1;
verylong z = 1 + 2 + 1;
return 0;
}
cpp_example.c
CSE333 lec 6 C.5 // 06-29-12 // perkins
Program that uses a linked list
#include <stdlib.h>
#include <assert.h>
#include "ll.h"
#include "ll.h"
Node *Push(Node *head,
void *element) {
... implementation here ...
}
ll.c
int main(int argc,
char **argv) {
Node *list = NULL;
char *hi = hello;
char *bye = goodbye;
list = Push(list, hi);
list = Push(list, bye);
typedef struct Node {
void *element;
struct Node *next;
} Node;
return 0;
}
Node *Push(Node *head,
void *element);
example_ll_customer.c
ll.h
CSE333 lec 6 C.5 // 06-29-12 // perkins
Compiling the program
Four steps:
- compile example_ll_customer.c into an object file
- compile ll.c into an object file
- link ll.o, example_ll_customer.o into an executable
- test, debug, rinse, repeat
bash$
bash$
bash$
bash$
bash$
gcc -Wall -g -o example_ll_customer.o -c example_ll_customer.c
gcc -Wall -g -o ll.o -c ll.c
gcc -o example_ll_customer -g ll.o example_ll_customer.o
./example_ll_customer
Payload: 'yo!'
Payload: 'goodbye'
Payload: 'hello'
bash$ valgrind --leak-check=full ./example_customer
...etc.
CSE333 lec 6 C.5 // 06-29-12 // perkins
Building systems
This doesnt really scale: gcc -Wall -g -o gadget *.c
- Could take hours (think gcc source, linux kernel, etc.)
If we change a single source file, what needs building?
- foo.c - at least recompile to get foo.o then relink
- foo.h - probably need to recompile foo.c then relink
But also need to recompile everything that #includes foo.h
Directly or indirectly...
Easy to forget something and get a broken build
Solution: let the computer figure it out!
CSE333 lec 6 C.5 // 06-29-12 // perkins
Build dependencies
All build tools work from same core idea: capture build
dependencies and recompile only whats needed
ex: Dependency graph for example_ll_customer
example_ll_customer.c
ll.h
example_ll_customer.o
ll.c
ll.o
example_ll_customer
CSE333 lec 6 C.5 // 06-29-12 // perkins
make
Venerable unix tool to automate build tasks
Input is a Makefile with rules describing dependencies:
target: sources
tab char command(s)
Meaning: if any source is newer (file system timestamp)
than target then run command(s)
- Nothing actually requires command(s) to have anything to do
with sources or target, although that is the typical usage.
Makefiles can do interesting things.
CSE333 lec 6 C.5 // 06-29-12 // perkins
Makefile for ll project
Makefile
# default target (i.e., first one) is the executable program
example_ll_customer: example_ll_customer.o ll.o
gcc -Wall -g -o example_ll_customer example_ll_customer.o ll.o
# targets for individual source files
example_ll_customer.o: example_ll_customer.c ll.h
gcc -Wall -g -c example_ll_customer.c
ll.o: ll.c ll.h
gcc -Wall -g -c ll.c
# clean: remove files built by makefile and emacs backup files
clean:
rm -f example_ll_customer *.o *~
Run make to build the default (first) target
Run make target to build named target
CSE333 lec 6 C.5 // 06-29-12 // perkins
Exercise 1
Extend the linked list program we covered in class:
- add a function that returns the number of elements in a list
- implement a program that builds a list of lists
i.e., it builds a linked list
but each element in the list is a (different) linked list
- bonus: design and implement a Pop function
removes an element from the head of the list
make sure your linked list code, and customers code that uses it,
contains no memory leaks
CSE333 lec 6 C.5 // 06-29-12 // perkins
Exercise 2
Implement and test a binary search tree
- http://en.wikipedia.org/wiki/Binary_search_tree
dont worry about making it balanced
- implement key insert( ) and lookup( ) functions
bonus: implement a key delete( ) function
- implement it as a C module
bst.c, bst.h
- implement test_bst.c
contains main( ), tests out your BST
CSE333 lec 6 C.5 // 06-29-12 // perkins
Exercise 3
Implement a Complex number module
- complex.c, complex.h
- includes a typedef to define a complex number
a + bi, where a and b are doubles
- includes functions to:
add, subtract, multiply, and divide complex numbers
- implement a test driver in test_complex.c
contains main( )
CSE333 lec 6 C.5 // 06-29-12 // perkins
See you on Monday!
CSE333 lec 6 C.5 // 06-29-12 // perkins