180class7

Please send questions to st10@humboldt.edu .
-----
-- CIS 180 - Intro to SQL
-- Class 7 - 3-24-03
-----

-- last modified: 3-24-03, post-class

spool 180class7_results.txt

-- setting up a clean set of empl, dept, customer tables

@ set_up_ex_tbls

-----
-- AGGREGATE FUNCTIONS - min, max, sum, avg, count
-----

-- can use these functions to compute a SINGLE value based
-- on ALL of the values from selected rows in a particular
-- column...

-- IMPORTANT TO NOTE: when you use one of these, you
-- are getting a SINGLE row as a result --- don't try to project
-- any OTHER columns along with them!!

-- what if you want to know the average employee salary?

select	avg(salary)
from	empl; 

-- what if you want to know the average salary of
-- Managers only?
-- (remember: selection in WHERE clause happens BEFORE
-- the computations and projections in the SELECT clause!)

select	avg(salary)
from	empl
where	job_title = 'Manager';

-- you CAN have as many computations as you want...

select	avg(salary), avg(commission), min(hiredate),
        max(commission)
from	empl
where	job_title = 'Salesman';

-- I CAN rename these columns to have "prettier" names...

select  avg(salary) "Avg Salary", avg(commission) "Avg Comm", 
        min(hiredate) "Earliest Hire", max(commission) "Highest Comm"
from    empl
where   job_title = 'Salesman';

-- but, you CANNOT do this:

select	empl_last_name, avg(salary)
from	empl;

-- count() tells you how many NON-NULL values are
-- in a column (after any selection, of course)

select	count(mgr), count(commission)
from	empl;

-- shortcut: count(*) simply counts the number of
-- rows in the result...

select	count(*), count(mgr), count(commission)
from	empl;

-- note how avg() indeed only averages the non-null
-- values below:

select	count(*), count(commission), sum(commission), avg(commission)
from    empl;


-- (AND: odd additional use of DISTINCT! Can put it
-- INSIDE the parentheses of an aggregate function
-- to filter out duplicates in that column before
-- compute...!)

select	count(job_title), count(distinct job_title)
from	empl;

-- does it work for others besides count()? It does?!

select sum(salary), sum(distinct salary)
from   empl;

-- Once you have these --- they are quite handy
-- in nesting, BUT you've got to remember that
-- they NEED to be within a SELECT...

-- which employee(s) are paid the least?

select	empl_last_name, salary
from	empl
where	salary =
	(select	min(salary)
	 from	empl);

-- is = evil, above? NO --- aggregate function ONLY
-- returns a SINGLE value, so OK!!

-- DON'T try it without nested select!

prompt FAILS: cannot call aggregate function within
prompt a WHERE condition directly...
select	empl_last_name, salary
from	empl
where	salary = min(salary);

-- which Managers make more than the average salary
-- for a Manager?

select	empl_last_name
from	empl
where	job_title = 'Manager'
and	salary >
	(select	avg(salary)
	 from	empl
	 where  job_title = 'Manager');

-- are there any Clerks who make more than a Salesman?

select  empl_last_name, salary
from    empl
where   job_title = 'Clerk'
and     salary >
        (select min(salary)
         from   empl
         where  job_title = 'Salesman');

-- sure, other tables can be involved...
-- which employees make less than the average employee in
-- New York?

select  empl_last_name, salary
from    empl
where   salary <
        (select avg(salary)
         from   empl e, dept d
         where  e.dept_num = d.dept_num
         and dept_loc = 'New York');

-- you can use these within sub-selects inside of
-- updates and deletes, too...

-- reset Salesmen to have the average salary of Analysts...

update empl
set    salary = (select avg(salary)
                 from	empl
	         where  job_title = 'Analyst')
where  job_title = 'Salesman';

select *
from   empl;

-----
-- GROUP BY clause - let you do computations using
-- aggregate functions for "groups" within your selected
-- rows. 
-----

-- if say:
--
-- group by col1
--
-- .. you want the rows "grouped" by COMMON values of
-- col1, and computations done on those groups!

-- IMPORTANT LIMITATION: ONCE you use group-by, you can
-- ONLY project the column(s) you've grouped by, and
-- aggregate computations! THAT'S IT!!!

-- want the average salary, latest hiredate, and employee
-- count, per department? 

select	 dept_num, avg(salary), max(hiredate), count(*)
from	 empl
group by dept_num;

-- want the average salary, latest hiredate, and employee
-- count by job_title?

select	 job_title, avg(salary), max(hiredate), count(*)
from	 empl
group by job_title;

-- CANNOT do this...

prompt FAILS --- can only project what you've grouped by!
select 	 dept_name, avg(salary)
from	 dept d, empl e
where	 e.dept_num = d.dept_num
group by d.dept_num;

-- if group by MULTIPLE columns --- groups on rows where
-- ALL of the values in those columns is the same

select 	 dept_name, avg(salary)
from	 dept d, empl e
where	 e.dept_num = d.dept_num
group by d.dept_num, dept_name;

-- but be careful...is this what you expect?

select	 job_title, mgr, avg(salary), count(*)
from	 empl
group by job_title, mgr;

-- OH --- and although you CANNOT put an aggregate function
-- in a WHERE clause --- you CAN put it in an ORDER-BY!
-- (although this only makes sense with group-by ---
-- do you understand why?)

select	 job_title, mgr, avg(salary), count(*)
from	 empl
group by job_title, mgr
order by count(*) desc;

select	 job_title, mgr, avg(salary), count(*)
from	 empl
group by job_title, mgr
order by avg(salary);

-----
-- HAVING
-----

-- HAVING is to groups what WHERE is to rows...
--
-- it is how to say, I only want to see SOME of the
-- resulting groups I've grouped-by!

-- what if I only want to see departments with at
-- least 4 employees?

select   dept_name, avg(salary), count(*)
from     dept d, empl e
where    e.dept_num = d.dept_num
group by d.dept_num, dept_name
having	 count(*) >= 4;   

-- this is allowed, too...
-- (yes, I can determine groups based on count(*)
-- even though I'm NOT projecting it..

select   dept_name, avg(salary)
from     dept d, empl e
where    e.dept_num = d.dept_num
group by d.dept_num, dept_name
having	 count(*) >= 4;   
			  
-- and of course, these can all be combined in interesting
-- ways...

-- what is the average number of employees per department?

select	 avg(count(*))
from	 empl
group by dept_num;

-- what departments have less than the average number
-- of employees per department?

select   dept_name, count(*)
from	 dept d, empl e
where	 e.dept_num = d.dept_num
group by d.dept_num, dept_name
having   count(*) <
	(select   avg(count(*))
	 from     empl
	 group by dept_num);

-- a few more EXISTS-related examples...

-- what movies are available on videos with format Beta?

select	movie_title
from	movie m
where   exists
        (select   'a'
         from     video v
         where    v.movie_num = m.movie_num
         and      vid_format = 'Beta');

-- what are movie titles for movies for which *no* rental
-- of *any* video for that movie has taken place?

select	movie_title
from	movie m
where   not exists
        (select 'a'
         from   video v
         where  v.movie_num = m.movie_num
         and    exists
	        (select 'a'
                 from   rental r
		 where  v.vid_id = r.vid_id));
                 

-- stop writing results to file 180class7_results.txt
spool off