315lab06-projected

Please send questions to st10@humboldt.edu .
-------
-- 315lab06-projected.sql
-- example SQL and SQL*Plus commands for CIS 315 lab, Week 6
--
-- last modified: 09-30-10 (post-lab)
-------

-- NOTE: this script uses the empl and dept tables from set-up-ex-tbls.sql;
--    It also has an example using the HW "video store" tables.
--    I am not re-running those table set-up scripts here, but these 
--    tables need to exist and be populated for this script to run!

-- let's spool the results into a file

spool lab06-results.txt    

------------
-- TWO SQL*Plus DEBUGGING-HELPER commands...!
------------

--------
-- prompt simply prints a message to the screen (or to your spooled
--    results, say before a query)
--    (prompt with no message? You get a blank line to the screen...)

prompt
prompt Howdy there CIS 315
prompt

--------
-- set echo on can be used so that you see the query,
--    and then its result, even for queries run from a SQL script

set echo on

select *
from   empl;

--------
-- NOTE: PLEASE set echo OFF again after debugging, typically;
--    it can make for LONG spooled results files... 
--------

set echo off

-- LAB EXERCISE PART 1 -
-- start a SQL script, lab6.sql or some such,
-- start it with comments including your name, Week 6 Lab, and today's date
-- put in spool and spool off commands to spool results into a file
--    such as lab6-results.txt

-- then, between the spool and spool off, put a prompt command that
--    prints some message of your choice to the screen







 
--*****************************************
-- nested select/subselect examples...
--*****************************************

-- you can actually have a where clause (or a from clause!) containing
--    another select statement --- a select NESTED within another,
--    a SUBselect inside of another;

-- who is/are the manager(s) of the highest-paid clerk (or clerks)?

-- first: working backwards...
-- what is the salary of the highest paid clerk?

select max(salary)
from   empl
where  job_title = 'Clerk';

-- next: what's is (or are) the employee numbers of that/those
-- clerks?

select empl_num
from   empl
where  job_title = 'Clerk'
       and    salary =  
              -- this subselect COMPUTES the desired salary to compare 
              --    salary to!
	      (select	max(salary)
	       from	empl
               where	job_title = 'Clerk');

-- MAKE SURE YOU UNDERSTAND: those rows are selected from the "outer" query
--    in which job_title is 'Clerk' AND the salary is equal to that 
--    maximum salary that is the result of the "inner", nested query

-- you can project whatever you want at the "top" level, of course;
-- showing that maximum clerk's last name and salary, too:

select  empl_last_name, salary
from    empl 
where   job_title = 'Clerk' 
        and salary = 
              (select max(salary) 
               from   empl 
               where  job_title = 'Clerk'); 

-- lab exercise part 2 -
-- in your lab6.sql script, write a query to project the last name
--    and hiredate of the lowest-paid Manager

select empl_last_name, hiredate
from   empl 
where  job_title = 'Manager'
       and salary = (select min(salary)
                     from empl
                     where job_title = 'Manager');

--------
-- there can be more than one table involved in nested selects...!
--------

-- more than one table can be involved in nested selects (although in such 
--    cases, the columns relating those tables will often also be 
--    involved);

-- consider: to find the department's whose location is Dallas:

select   dept_num
from	 dept
where    dept_loc = 'Dallas';

-- ...and now, to find the names and salaries of employees who work in Dallas:

select   empl_last_name, salary
from	 empl
where    dept_num in
	 (select dept_num
	  from	 dept
	  where  dept_loc = 'Dallas');

--------
-- COMMON ERROR: aggregate functions ONLY work in a query -- they
--    don't make sense "on their own". The following WON'T WORK:

-- BAD QUERY! NO BISCUIT!!!!!!
select empl_num
from   empl
where  job_title = 'Clerk'
       and salary = max(salary);  -- ILLEGAL! and what would it mean, anyway?
       	              	          --    ...max salary of what rows?

-- ERROR you'll get from the above:
-- ERROR at line 4:
-- ORA-00934: group function is not allowed here
--
-- get it? aggregate == group...

-- you can nest as deeply as you want/need to!
-- well --- why not just show who THAT empl's manager is?
--    (the manager(s) of the highest-paid clerk(s))

-- first: you need the employee number of the clerk's manager,
--    so project that:

select   mgr
from	 empl
where	 job_title = 'Clerk'
         and salary =
	        (select max(salary)
	         from   empl
	         where  job_title = 'Clerk');

-- then, let's get the name(s) of these managers:

select   empl_last_name
from	 empl
where	 empl_num in
         -- "build" the list of desired managers' empl nums
	 (select  mgr
	  from    empl
	  where   job_title = 'Clerk'
	          and salary =
	                (select max(salary)
	                 from	empl
	                 where  job_title = 'Clerk'));

-- lab exercise part 3 - write a query USING A SUB-SELECT that
--    projects the hiredate of the employees whose location
--    is New York

select hiredate
from   empl
where  dept_num in
       (select dept_num
        from   dept
        where  dept_loc = 'New York');

--------
-- you can have a nested select defining a table
--    in your FROM clause, too!
--------

select empl_last_name, dept_name
from   (select *
        from   empl e, dept d
        where  e.dept_num = d.dept_num)
where  dept_name = 'Operations';

select empl_last_name, dept_name
from   (select *
        from   empl e join dept d on e.dept_num = d.dept_num)
where  dept_name = 'Operations';

--------
-- NOTE!!!!
--    the outer select-clause (and where-clause) have to use the "column names"
--    as provided by the result of the from clause
--------

select ename, dname
from   (select empl_last_name ename, dept_name dname
        from empl e, dept d
        where e.dept_num = d.dept_num)
where   dname = 'Operations';

select ename, dname
from   (select empl_last_name ename, dept_name dname
        from empl e join dept d on (e.dept_num = d.dept_num))
where  dname = 'Operations';

-- here's the error you'll get if you don't use those names:

select empl_last_name, dname
from   (select empl_last_name ename, dept_name dname
        from empl e, dept d
        where e.dept_num = d.dept_num)
where   dname = 'Operations';

--------
-- more examples of nested selects in the where-clause
--------

-- who are the managers of clerks making more than the average
-- salary for clerks?

select  empl_last_name
from    empl
where   empl_num in
        (select mgr
         from   empl
         where  job_title = 'Clerk'
                and salary >
                       (select avg(salary)
                        from   empl
                        where  job_title = 'Clerk'));

-- here's one where I might more commonly expect multiple values...
-- what are the names of managers of clerks?

-- what are the employee numbers of clerks?

select   empl_num
from	 empl
where	 job_title = 'Clerk';

-- what are the employee numbers of the MANAGERS of these clerks?

select   mgr
from	 empl
where	 job_title  = 'Clerk';

-- and, what are the empl_last_names of these managers?

-- BAD QUERY -- WON'T WORK:
select   empl_last_name
from	 empl
where	 empl_num =
	 (select mgr
	  from   empl
	  where	 job_title = 'Clerk');

--------
-- IF THE SUBSELECT can result in MORE THAN ONE VALUE,
--    even over time, then the IN operator is SAFER than the = operator!
--------

-- WILL WORK:

select   empl_last_name
from	 empl
where	 empl_num IN
	 (select mgr
	  from   empl
	  where	 job_title = 'Clerk');

----------
-- note: especially when the result columns are all from one table,
-- but the "computation" involves more than one table, there is
-- OFTEN more than one valid way to write a query ---
-- here, then, is a JOIN that gives the names and salaries of
-- employees working in Dallas:

select   empl_last_name, salary
from	 empl, dept
where	 empl.dept_num = dept.dept_num
         and dept_loc = 'Dallas';

select   empl_last_name, salary
from     empl join dept on (empl.dept_num = dept.dept_num)
where    dept_loc = 'Dallas';

-- ...are those parentheses needed around join condition after ON? maybe not:

select   empl_last_name, salary
from     empl join dept on empl.dept_num = dept.dept_num
where    dept_loc = 'Dallas';

-----
-- and queries involving joins can be involved with nested selects, too!
-----

-- what is the manager's name who manages the highest paid
-- clerk, AND that clerk's last name and salary

select e2.empl_last_name "Manager" , e1.empl_last_name "Clerk's name",
       e1.salary "Clerk's salary"
from   empl e1, empl e2
where  e1.mgr = e2.empl_num
       and  e1.job_title = 'Clerk'
       and  e1.salary =
              (select max(salary)
               from   empl
               where  job_title = 'Clerk');

select e2.empl_last_name "Manager" , e1.empl_last_name "Clerk's name",
       e1.salary "Clerk's salary"
from   empl e1 join empl e2 on e1.mgr = e2.empl_num
where  e1.job_title = 'Clerk'
       and e1.salary =
             (select max(salary)
              from   empl
              where  job_title = 'Clerk');

-----
-- more nested examples!
-----

-- give all the employee info for clerks who make MORE than the
-- lowest-paid salesmen

-- what's the salary of the lowest-paid salesman?

select   min(salary)
from	 empl
where	 job_title = 'Salesman';

-- so, what's the info for Clerks who make more than this?

select   *
from	 empl
where    job_title = 'Clerk'
         and salary > 
	        (select min(salary)
	         from   empl
	         where	job_title = 'Salesman');
	       
--------
-- what you can do with a nested select, you can often do
-- with a join, as well --- here's an example:
--------

-- note that, IF a query projects columns from MORE THAN ONE
-- table, a join will HAVE to be at least partially involved;
--
-- what if you want empl_last_name and dept_location of
-- clerks making more than a non-clerk?

select  empl_last_name, dept_loc
from    empl e, dept d
where	e.dept_num = d.dept_num
        and     job_title = 'Clerk'
        and     salary >
	        (select min(salary)
                 from   empl
                 where  job_title != 'Clerk');

select  empl_last_name, dept_loc
from    empl e join dept d on e.dept_num = d.dept_num
where	job_title = 'Clerk'
        and salary >
	    (select min(salary)
             from   empl
             where  job_title != 'Clerk');

-- IF a query projects columns from only one table, but uses
-- information from more than one table to determine what rows
-- to select, then you can often use either a join, a nested select,
-- or some interesting combination thereof...

-- (yes, there are queries where you cannot do a "pure" join,
-- either --- consider the one above, I think. For example, if
-- your query involves those with an attribute more or less than a 
-- minimum, more or less than an average, etc., I believe you'll
-- need at least some nesting to accomplish such a query.) 

------
-- now, SOMETIMES you can get duplicate rows in one version
-- that you DON'T get in another...
------

-- which departments (and their locations) have employees hired
-- before June 1, 1991?

select dept_name, dept_loc
from   dept
where  dept_num in
       (select dept_num
        from   empl
        where  hiredate < '01-JUN-1991');

-- NOTE that these CAN return duplicate rows...

select dept_name, dept_loc
from   dept d, empl e
where  d.dept_num = e.dept_num
       and hiredate < '01-JUN-1991';

select dept_name, dept_loc
from   dept d join empl e on d.dept_num = e.dept_num
where  hiredate < '01-JUN-1991';

-- ...so this is how I could PREVENT those duplicates, of course:

select distinct dept_name, dept_loc
from   dept d, empl e
where  d.dept_num = e.dept_num
       and hiredate < '01-JUN-1991';

select distinct dept_name, dept_loc
from   dept d join empl e on d.dept_num = e.dept_num
where  hiredate < '01-JUN-1991';

--------
--- FUN ASIDE: more interesting projection options: projecting
---    literals, and concatenation
-------

-- Demonstration of putting a literal value into a select clause:
-- shows that you project that value for each row in the result set

-- This sets the "pagesize" of the sqlplus output buffer:
set pagesize 20

select  'b'
from    empl;

-- this looks useless? No, it can be pretty neat, in combination
-- with another feature: you can CONCATENATE two things into 1
-- column with ||:

select dept_num || dept_name
from dept;

-- ugly, right? Ah, but what if you concatenated some spaces
-- and a dash in between:

select dept_num || ' - ' || dept_name "Department"
from dept;

-- pretty nice looking hmm?
-- imagine the possibilities: 
--      last_name || ', ' || first_name
--      city || ', ' || state || '   ' || zip
--      '(' || area_code || ') ' || phone_num
-- or you can create a comma-separated version of data for 
--    a spreadsheet by concatenating commas between values, etc.
-- etc.!

--***********************************************
--- ON to EXISTS!
--**********************************************

-- add a department with no employees

insert into dept
values
('600', 'Computer', 'Arcata');

-- EXISTS is a predicate (like IN, =, <, etc.) ---
--  (Sunderraman) "The exists predicate is true if the sub-select 
--  results in a non-empty set of values, and it is false otherwise."

-- SO: for each row, that row satisfies the EXISTS predicate,
--    and so is selected, if the EXISTS's subquery is non-empty for 
--    that row;
-- when using exists in a where-clause, you select those rows
--    for which the exists' subquery is not empty

-- exists is almost always used with a CORRELATION condition --- with a
-- CORRELATED subquery: when the inner select uses data from the *outer*
-- select. 
--
-- (That is, the inner select is using an attribute or attributes from
-- table(s) included in the from clause of the outer select, that are
-- NOT included in the from clause of the inner select.)

-- Example #1: use EXISTS to list only the locations and
-- names of departments WITH employees
-- 
--!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-- remember that the inner query, with an exists,
-- virtually ALWAYS has a correlation condition!!!!
--!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
--
-- note the inner query is used to see if there
-- are *any* rows matching the criterion - so it
-- doesn't matter what you use in the inner
-- select, and selecting a literal is considered
-- more efficient than, say, select *

-- how this works, roughly: what rows are selected? Those rows for
-- which the inner select is non-empty;
-- here: rows of dept are selected for which there is an employee
-- row with that row's dept_num;

select  dept_loc, dept_name
from    dept
where   exists
        (select         'a'
         from           empl
         where          empl.dept_num = dept.dept_num); 

-- use NOT EXISTS to list which departments
-- currently have NO employees:

select  dept_loc, dept_name
from    dept
where   NOT exists
        (select   'a'
         from     empl
         where    empl.dept_num = dept.dept_num);

-- note the correlation condition is absolutely
-- necessary;
-- the correlation condition is correlating the
-- inner query with the outer query
--
-- Can think of the inner query being executed for each row of
-- the outer query

-- what if we simply omitted the correlation condition?
-- (hint: it is not what we want...)

select  dept_loc, dept_name
from    dept
where   exists
        (select 'a'
         from   empl);

select  dept_loc, dept_name
from    dept
where   NOT exists
        (select   'a'
         from     empl);

-- OOPS, you get ALL the rows, or NONE of them;

-- What happens if you make the inner select
-- a join instead of a simple correlated query?

select  dept_loc, dept_name
from    dept
where   NOT exists
        (select   'a'
        -- NOOOO! You just turned this into a join!!!!
        from      empl, DEPT
        where     empl.dept_num = dept.dept_num);

-- Note that we just got the WRONG result - it
-- says that NO department has no employees
-- (that is, it says all departments have employees
-- which just isn't true)
--
-- What is happening?
-- The inner query is not correlated with the outer
-- query. Each time it fires, it produces exactly
-- the same set of rows. Since it always returns
-- some rows, the NOT EXISTS is never true. So, we
-- get no rows returned from the outer query, which
-- we interpret as "all departments have employees"

select  dept_loc, dept_name
from    dept d1
where   NOT exists
        (select   'a'
         from      empl, DEPT d2
         where     empl.dept_num = d2.dept_num   -- join condition
         and       empl.dept_num = d1.dept_num   -- correlation condition
        );

select  dept_loc, dept_name
from    dept d1
where   NOT exists
        (select 'a'
         from   empl join DEPT d2 on 
                     empl.dept_num = d2.dept_num   -- join condition
         where  empl.dept_num = d1.dept_num   -- correlation condition
        );

--------
-- a BIGGER example, where exists is more "valuable" --
-- 
-- consider for a moment the HW tables -- client, rental, video, etc.
-- what if I'd like to ask: has any client rented BOTH
--    'Gone with the Wind' and 'Star Wars'?

-- one way to ask this: are there any clients for which
--    there EXISTS a rental of 'Gone with the Wind'
--    AND there EXISTS a rental of 'Star Wars'?

-- rentals involve client_nums and vid_ids -- what are the vid_id's for
--    'Gone with the Wind' and 'Star Wars'?

select vid_id
from   movie m, video v
where  m.movie_num = v.movie_num
       and movie_title IN ('Gone with the Wind', 'Star Wars');

select vid_id
from   movie m join video v on m.movie_num = v.movie_num
where  movie_title IN ('Gone with the Wind', 'Star Wars');

-- careful, though -- we want clients who have rented BOTH of these movies,
--    NOT 2 different copies of 'Gone with the Wind'!

-- SO: we want clients for which a 'Gone with the Wind' rental
--    exists, AND for which a 'Star Wars' rental exists!

select client_lname || ', ' || client_fname "Client of interest"
from   client c
where  exists
       (select 'a'
        from   rental r
        where  c.client_num = r.client_num   -- correlation condition
               and vid_id in
                      (select vid_id
                       from   movie m, video v
                       where  m.movie_num = v.movie_num  -- join condition
                              and movie_title = 'Gone with the Wind'))
       and exists
       (select 'a'
        from   rental r
        where  c.client_num = r.client_num   -- correlation condition
               and vid_id in
                     (select vid_id
                      from   movie m, video v     
                      where  m.movie_num = v.movie_num   -- join condition
                             and movie_title = 'Star Wars'));

select client_lname || ', ' || client_fname "Client of interest"
from   client c
where  exists
       (select 'a'
        from   rental r
        where  c.client_num = r.client_num   -- correlation condition
               and vid_id in
                      (select vid_id
                       from   movie m join video v 
                                 on  m.movie_num = v.movie_num  -- join condition
                       where  movie_title = 'Gone with the Wind'))
       and exists
       (select 'a'
        from   rental r
        where  c.client_num = r.client_num   -- correlation condition
               and vid_id in
                     (select vid_id
                      from   movie m join video v
                                on m.movie_num = v.movie_num   -- join condition
                      where  movie_title = 'Star Wars'));


-- double check: what movies HAS Edie Beta rented?

select movie_title
from   movie m, video v, rental r
where  m.movie_num = v.movie_num
       and v.vid_id = r.vid_id
       and client_num IN
           (select client_num
            from   client
            where  client_lname = 'Beta'
                   and client_fname = 'Edie');

select movie_title
from   video v join movie m on v.movie_num = m.movie_num
               join rental r on v.vid_id = r.vid_id
where  client_num IN
           (select client_num
            from   client
            where  client_lname = 'Beta'
                   and client_fname = 'Edie');

-- I want to know the customer's name and the name of their
-- employee rep for customers whose employee rep has made over
-- $1000 in commission;

select  cust_lname || ', ' || cust_fname "Customer", 
        empl_last_name "Customer Rep"
from    customer c, empl e
where   empl_rep = empl_num
        and  exists
             (select 'a'
              from   empl e
              where  c.empl_rep = e.empl_num -- correlation condition
                     and commission > 1000);

select  cust_lname || ', ' || cust_fname "Customer",
        empl_last_name "Customer Rep"
from    customer c join empl e on empl_rep = empl_num
where   exists
        (select 'a'
         from   empl e
         where  c.empl_rep = e.empl_num -- correlation condition
                and commission > 1000);

-----
-- STYLE aside:
-----
-- Because of the way that exists/not exists works, SOMETIMES
-- using it with a Cartesian product (where you really ought to
-- have a join --- that is, include a join condition)  gives a correct
-- answer, IF you use distinct to filter out the MANY excess copies
-- of the desired rows.
--
-- This, however, will be considered POOR STYLE in this class, 
-- and will NOT be accepted for credit. It is just too easy for this
-- kind of approach to lead to incorrect and hard-to-read queries.
--
-- 315 Style (and correctness) rule of thumb: unless you REALLY
-- intend to ask for a Cartesian product, whenever you have N
-- tables in your FROM clause, you need to have (N-1) appropriate
-- corresponding join conditions in your WHERE clause (or in your ON clauses
-- for "new-style" joins)
-----

-----
-- Finally, sometimes a query can be written with
-- IN or with EXISTS or with a JOIN (or with some combination!)
-----
-- The three queries below all correctly answer this
-- question:  
-- List locations with at least one employee hired
-- before May 1, 1991

-- This one uses EXISTS and a correlated subquery:

select  dept_loc
from    dept d
where   exists
        (select 'a'
         from   empl e
         where  e.dept_num = d.dept_num -- correlation condition
                and hiredate < '01-May-1991');

-- This one uses an IN and a subquery:

select  dept_loc
from    dept d
where   dept_num in
        (select dept_num
         from   empl e  
         where  hiredate < '01-May-1991');

-- And these use a join

select  distinct dept_loc
from    dept d, empl e
where   d.dept_num = e.dept_num -- join condition
        and hiredate < '01-May-1991';

select  distinct dept_loc
from    dept d join empl e on d.dept_num = e.dept_num -- join condition
where   hiredate < '01-May-1991';

-- to show why I used DISTINCT above: because there could be 
--    multiple such employees at a particular location...

select  dept_loc
from    dept d, empl e
where   d.dept_num = e.dept_num -- join condition
        and hiredate < '01-May-1991';

select  dept_loc
from    dept d join empl e on d.dept_num = e.dept_num -- join condition
where   hiredate < '01-May-1991';

-- and there may be OTHER reasonable ways to write this query, too! 8-)

spool off

-- end of 315lab06-projected.sql