diff --git a/02_activities/assignments/Assignment2.md b/02_activities/assignments/Assignment2.md index 5cbb4e70f..a86b4af2b 100644 --- a/02_activities/assignments/Assignment2.md +++ b/02_activities/assignments/Assignment2.md @@ -54,7 +54,10 @@ The store wants to keep customer addresses. Propose two architectures for the CU **HINT:** search type 1 vs type 2 slowly changing dimensions. ``` -Your answer... +type 1 = overwrite +-- Updates the dimension record with the new value, overwriting the old value, does not save the old values +type 2 = keep full history +-- Inserts a new record for each change, preserving the old one. Full history retained ``` *** diff --git a/02_activities/assignments/Two Entity-Relationship Diagrams.png b/02_activities/assignments/Two Entity-Relationship Diagrams.png new file mode 100644 index 000000000..5030298eb Binary files /dev/null and b/02_activities/assignments/Two Entity-Relationship Diagrams.png differ diff --git a/02_activities/assignments/assignment2.sql b/02_activities/assignments/assignment2.sql index 5ad40748a..0eb3f2214 100644 --- a/02_activities/assignments/assignment2.sql +++ b/02_activities/assignments/assignment2.sql @@ -7,8 +7,8 @@ We tell them, no problem! We can produce a list with all of the appropriate deta Using the following syntax you create our super cool and not at all needy manager a list: -SELECT -product_name || ', ' || product_size|| ' (' || product_qty_type || ')' +SELECT +product_name || ', ' || product_size|| ' (' || product_qty_type || ')' FROM product But wait! The product table has some bad data (a few NULL values). @@ -19,9 +19,23 @@ HINT: keep the syntax the same, but edited the correct components with the strin The `||` values concatenate the columns into strings. Edit the appropriate columns -- you're making two edits -- and the NULL rows will be fixed. All the other rows will remain the same.) */ - - - + +SELECT +product_name || ', ' || product_size|| ' (' || product_qty_type || ')' +FROM product; + +SELECT * +FROM product +WHERE product_size IS NULL + OR product_qty_type IS NULL + OR product_name is NULL; + +SELECT + product_name || ', ' || + COALESCE(product_size, '') || ' (' || + COALESCE(product_qty_type, 'unit') || ')' +FROM product; + --Windowed Functions /* 1. Write a query that selects from the customer_purchases table and numbers each customer’s visits to the farmer’s market (labeling each market date with a different number). @@ -32,18 +46,88 @@ each new market date for each customer, or select only the unique market dates p (without purchase details) and number those visits. HINT: One of these approaches uses ROW_NUMBER() and one uses DENSE_RANK(). */ - +SELECT + customer_id, + market_date, + ROW_NUMBER() OVER ( + PARTITION BY customer_id + ORDER BY market_date) + AS visit_number +FROM customer_purchases; + +SELECT + customer_id, + market_date, + DENSE_RANK() OVER ( + PARTITION BY customer_id + ORDER BY market_date) + AS visit_number +FROM (SELECT DISTINCT customer_id, market_date + FROM customer_purchases); /* 2. Reverse the numbering of the query from a part so each customer’s most recent visit is labeled 1, then write another query that uses this one as a subquery (or temp table) and filters the results to only the customer’s most recent visit. */ - +SELECT + customer_id, + market_date, + ROW_NUMBER() OVER ( + PARTITION BY customer_id + ORDER BY market_date DESC + ) AS visit_number +FROM customer_purchases; + +SELECT + customer_id, + market_date, + DENSE_RANK() OVER ( + PARTITION BY customer_id + ORDER BY market_date DESC + ) AS visit_number +FROM ( + SELECT DISTINCT customer_id, market_date + FROM customer_purchases +); + +SELECT * +FROM ( + SELECT + customer_id, + market_date, + ROW_NUMBER() OVER ( + PARTITION BY customer_id + ORDER BY market_date DESC + ) AS visit_number + FROM customer_purchases +) AS ranked +WHERE visit_number = 1; + +SELECT * +FROM ( + SELECT + customer_id, + market_date, + DENSE_RANK() OVER ( + PARTITION BY customer_id + ORDER BY market_date DESC + ) AS visit_number + FROM ( + SELECT DISTINCT customer_id, market_date + FROM customer_purchases + ) +) AS ranked +WHERE visit_number = 1; /* 3. Using a COUNT() window function, include a value along with each row of the customer_purchases table that indicates how many different times that customer has purchased that product_id. */ - +SELECT + *, + COUNT(*) OVER ( + PARTITION BY customer_id, product_id + ) AS purchase_count +FROM customer_purchases; -- String manipulations /* 1. Some product names in the product table have descriptions like "Jar" or "Organic". @@ -56,11 +140,33 @@ Remove any trailing or leading whitespaces. Don't just use a case statement for | Habanero Peppers - Organic | Organic | Hint: you might need to use INSTR(product_name,'-') to find the hyphens. INSTR will help split the column. */ - + +SELECT + product_name, + TRIM( + SUBSTR( + product_name, + INSTR(product_name, '-') + 1 + ) + ) AS description +FROM product; + +SELECT + product_name, + CASE + WHEN INSTR(product_name, '-') > 0 THEN + TRIM(SUBSTR(product_name, INSTR(product_name, '-') + 1)) + ELSE + NULL + END AS description +FROM product; /* 2. Filter the query to show any product_size value that contain a number with REGEXP. */ +SELECT * +FROM product +WHERE product_size REGEXP '[0-9]'; -- UNION @@ -72,8 +178,21 @@ HINT: There are a possibly a few ways to do this query, but if you're struggling "best day" and "worst day"; 3) Query the second temp table twice, once for the best day, once for the worst day, with a UNION binding them. */ - - + +WITH sales_by_date AS ( + SELECT + market_date, + SUM(quantity * cost_to_customer_per_qty) AS total_sales + FROM customer_purchases + GROUP BY market_date +) +SELECT market_date, total_sales +FROM sales_by_date +WHERE total_sales = (SELECT MAX(total_sales) FROM sales_by_date) +UNION +SELECT market_date, total_sales +FROM sales_by_date +WHERE total_sales = (SELECT MIN(total_sales) FROM sales_by_date); /* SECTION 3 */ @@ -89,27 +208,57 @@ Think a bit about the row counts: how many distinct vendors, product names are t How many customers are there (y). Before your final group by you should have the product of those two queries (x*y). */ - - +SELECT + v.vendor_name, + p.product_name, + COUNT(c.customer_id) * 5 * vi.original_price AS total_revenue +FROM vendor_inventory vi +JOIN vendor v + ON vi.vendor_id = v.vendor_id +JOIN product p + ON vi.product_id = p.product_id +CROSS JOIN customer c +GROUP BY v.vendor_name, p.product_name; + -- INSERT /*1. Create a new table "product_units". This table will contain only products where the `product_qty_type = 'unit'`. It should use all of the columns from the product table, as well as a new column for the `CURRENT_TIMESTAMP`. Name the timestamp column `snapshot_timestamp`. */ - - + +CREATE TABLE product_units AS +SELECT + p.*, + CURRENT_TIMESTAMP AS snapshot_timestamp +FROM product p +WHERE product_qty_type = 'unit'; /*2. Using `INSERT`, add a new row to the product_units table (with an updated timestamp). This can be any product you desire (e.g. add another record for Apple Pie). */ - +INSERT INTO product_units +SELECT + product_id, + product_name, + product_size, + product_category_id, + product_qty_type, + CURRENT_TIMESTAMP as snapshot_timestamp +FROM product +WHERE product_name = 'Apple Pie'; -- DELETE /* 1. Delete the older record for the whatever product you added. HINT: If you don't specify a WHERE clause, you are going to have a bad time.*/ - +DELETE FROM product_units +WHERE product_name = 'Apple Pie' +AND snapshot_timestamp = ( + SELECT min(snapshot_timestamp) + FROM product_units + WHERE product_name = 'Apple Pie' +); -- UPDATE /* 1.We want to add the current_quantity to the product_units table. @@ -128,6 +277,45 @@ Finally, make sure you have a WHERE statement to update the right row, you'll need to use product_units.product_id to refer to the correct row within the product_units table. When you have all of these components, you can run the update statement. */ - - - +ALTER TABLE product_units +ADD current_quantity INT; +UPDATE product_units +SET current_quantity = ( + SELECT vi.quantity + FROM vendor_inventory vi + WHERE vi.product_id = product_units.product_id + ORDER BY vi.rowid DESC + LIMIT 1 +); + + +SELECT product_id, quantity +FROM vendor_inventory vi +WHERE rowid = ( + SELECT MAX(rowid) + FROM vendor_inventory + WHERE product_id = vi.product_id +); + +SELECT pu.product_id, + COALESCE(( + SELECT vi.quantity + FROM vendor_inventory vi + WHERE vi.product_id = pu.product_id + ORDER BY vi.rowid DESC + LIMIT 1 + ), 0) AS last_quantity +FROM product_units pu; + +UPDATE product_units +SET current_quantity = COALESCE(( + SELECT vi.quantity + FROM vendor_inventory vi + WHERE vi.product_id = product_units.product_id + ORDER BY vi.rowid DESC + LIMIT 1 + ), 0) +WHERE product_id IN ( + SELECT DISTINCT product_id + FROM vendor_inventory +); \ No newline at end of file