Випадок #5 — Рішення для Data Mart

https://8weeksqlchallenge.com/case-study-5/

pic

1. Кроки очищення даних

DROP TABLE IF EXISTS datamart.cleanweeklysales;
CREATE TABLE data
mart.cleanweeklysales (
weekdate DATE
, week
number INT
, monthnumber INT
, calendar
year INT
, region NVARCHAR(20)
, platform NVARCHAR(10)
, segment NVARCHAR(10)
, customertype NVARCHAR(10)
, age
band NVARCHAR(15)
, demographic NVARCHAR(10)
, transactions INT
, sales INT
, avg_transactions DECIMAL(27, 2)
);

INSERT INTO datamart.cleanweeklysales
(week
date
, weeknumber
, month
number
, calendaryear
, region
, platform
, segment
, customer
type
, ageband
, demographic
, transactions
, sales
, avg
transactions)
SELECT DATEFROMPARTS(CASE
WHEN RIGHT(ws.weekdate, 2) = ‘20’ THEN 2020
WHEN RIGHT(ws.week
date, 2) = ‘19’ THEN 2019
WHEN RIGHT(ws.weekdate, 2) = ‘18’ THEN 2018
END
, CONVERT(INT, LEFT(RIGHT(ws.week
date, 4), 1))
, CASE
WHEN CHARINDEX(‘/’, ws.weekdate) = 2 THEN CONVERT(INT, LEFT(ws.weekdate, 1))
WHEN CHARINDEX(‘/’, ws.weekdate) = 3 THEN CONVERT(INT, LEFT(ws.weekdate, 2))
END) AS weekdate
, DATEPART(WEEK, DATEFROMPARTS(CASE
WHEN RIGHT(ws.week
date, 2) = ‘20’ THEN 2020
WHEN RIGHT(ws.weekdate, 2) = ‘19’ THEN 2019
WHEN RIGHT(ws.week
date, 2) = ‘18’ THEN 2018
END
, CONVERT(INT, LEFT(RIGHT(ws.weekdate, 4), 1))
, CASE
WHEN CHARINDEX(‘/’, ws.week
date) = 2 THEN CONVERT(INT, LEFT(ws.weekdate, 1))
WHEN CHARINDEX(‘/’, ws.week
date) = 3 THEN CONVERT(INT, LEFT(ws.weekdate, 2))
END)) AS week
number
, DATEPART(MONTH, DATEFROMPARTS(CASE
WHEN RIGHT(ws.weekdate, 2) = ‘20’ THEN 2020
WHEN RIGHT(ws.week
date, 2) = ‘19’ THEN 2019
WHEN RIGHT(ws.weekdate, 2) = ‘18’ THEN 2018
END
, CONVERT(INT, LEFT(RIGHT(ws.week
date, 4), 1))
, CASE
WHEN CHARINDEX(‘/’, ws.weekdate) = 2 THEN CONVERT(INT, LEFT(ws.weekdate, 1))
WHEN CHARINDEX(‘/’, ws.weekdate) = 3 THEN CONVERT(INT, LEFT(ws.weekdate, 2))
END)) AS monthnumber
, DATEPART(YEAR, DATEFROMPARTS(CASE
WHEN RIGHT(ws.week
date, 2) = ‘20’ THEN 2020
WHEN RIGHT(ws.weekdate, 2) = ‘19’ THEN 2019
WHEN RIGHT(ws.week
date, 2) = ‘18’ THEN 2018
END
, CONVERT(INT, LEFT(RIGHT(ws.weekdate, 4), 1))
, CASE
WHEN CHARINDEX(‘/’, ws.week
date) = 2 THEN CONVERT(INT, LEFT(ws.weekdate, 1))
WHEN CHARINDEX(‘/’, ws.week
date) = 3 THEN CONVERT(INT, LEFT(ws.weekdate, 2))
END)) AS calendar
year
, ws.region AS region
, ws.platform AS platform
, CASE
WHEN ws.segment = ‘null’ THEN ‘unknown’
ELSE ws.segment
END AS segment
, ws.customertype AS customertype
, CASE
WHEN ws.segment <> ‘null’ THEN CASE
WHEN CONVERT(INT, RIGHT(ws.segment, 1)) = 1 THEN ‘Young Adults’
WHEN CONVERT(INT, RIGHT(ws.segment, 1)) = 2 THEN ‘Middle Aged’
WHEN CONVERT(INT, RIGHT(ws.segment, 1)) = 3 OR CONVERT(INT, RIGHT(ws.segment, 1)) = 4 THEN ‘Retirees’
END
WHEN ws.segment = ‘null’ THEN ‘unknown’
END AS ageband
, CASE
WHEN ws.segment <> ‘null’ THEN CASE
WHEN LEFT(ws.segment, 1) = ‘C’ THEN ‘Couples’
WHEN LEFT(ws.segment, 1) = ‘F’ THEN ‘Families’
END
WHEN ws.segment = ‘null’ THEN ‘unknown’
END AS demographic
, ws.transactions AS transactions
, ws.sales AS sales
, CAST(ROUND((ws.sales * 100.00) / (ws.transactions), 2) AS DECIMAL(27, 2)) AS avg
transactions
FROM datamart.weeklysales ws;

SELECT *
FROM datamart.cleanweekly_sales cws;

Приклад вихідних даних надано; повний набір даних не показано через обмеження по місцю.

pic

2. Дослідження даних

1.
Що за день тижня використовується для кожного значення week_date?

SELECT DATENAME(WEEKDAY, cws.weekdate) AS dayoftheweek
— , COUNT(*)
FROM datamart.cleanweeklysales cws
GROUP BY DATENAME(WEEKDAY, cws.week
date)
ORDER BY DATENAME(WEEKDAY, cws.week_date);

pic

2. Який діапазон номерів тижнів відсутній у наборі даних?

Рішення:

WITH cte
AS
(SELECT 1 AS weeknumber
UNION ALL
SELECT week
number + 1
FROM cte
WHERE weeknumber < 53)
SELECT week
number AS missingweeknumbers
FROM cte
WHERE weeknumber NOT IN (SELECT cws.weeknumber
FROM datamart.cleanweekly_sales cws);

Приклад вихідних даних надано; повний набір даних не показано через обмеження по місцю.

pic

3. Скільки всього транзакцій було за кожен рік у наборі даних?

Рішення:

SELECT cws.calendaryear AS calendaryear
, SUM(cws.transactions) AS totaltransactions
FROM data
mart.cleanweeklysales cws
GROUP BY cws.calendaryear
ORDER BY cws.calendar
year DESC;

pic

4. Який загальний обсяг продажів для кожного регіону за кожен місяць?

Рішення:

WITH cte
AS
(SELECT cws.region AS region
, cws.monthnumber AS month
, SUM(CONVERT(BIGINT, cws.sales)) AS total
sales
FROM datamart.cleanweeklysales cws
GROUP BY cws.region
, cws.month
number)
SELECT region
, DATENAME(MONTH, DATEADD(MONTH, month, 0) — 1) AS monthname
, total
sales
FROM cte
ORDER BY region ASC
, month ASC;

Приклад вихідних даних надано; повний набір даних не показано через обмеження по місцю.

pic

5. Яка загальна кількість транзакцій для кожної платформи?

Рішення:

SELECT cws.platform
, SUM(cws.transactions) AS totaltransactions
FROM data
mart.cleanweeklysales cws
GROUP BY cws.platform
ORDER BY cws.platform ASC;

pic

6. Який відсоток продажів для Retail порівняно з Shopify за кожен місяць?

Рішення:

— cte1: Обчислення загальних місячних продажів для кожного календарного року окремо

WITH cte1
AS
(SELECT cws.calendaryear
, cws.month
number
, SUM(CONVERT(BIGINT, cws.sales)) AS totalmonthlysales
FROM datamart.cleanweeklysales cws
GROUP BY cws.calendar
year
, cws.month_number),

— cte2: Обчислення місячних продажів по платформах для кожного календарного року окремо

cte2
AS
(SELECT cws.platform
, cws.calendaryear
, cws.month
number
, SUM(CONVERT(BIGINT, cws.sales)) AS salesbyplatform
FROM datamart.cleanweeklysales cws
GROUP BY cws.platform
, cws.calendar
year
, cws.month_number)

SELECT pvt.calendaryear
, DATENAME(MONTH, DATEADD(MONTH, pvt.month
number, 0) — 1) AS calendarmonth
, pvt.Retail
, pvt.Shopify
FROM (SELECT cte2.calendar
year
, cte2.monthnumber
, platform
, CAST((cte2.sales
byplatform * 100.00 / cte1.totalmonthlysales) AS DECIMAL(38, 2)) AS platformwisemonthlysalespercentage
FROM cte1
JOIN cte2 ON cte1.calendar
year = cte2.calendaryear
AND cte1.month
number = cte2.monthnumber) AS finalresult

PIVOT (SUM(finalresult.platformwisemonthlysalespercentage)
FOR final
result.platform IN ([Retail], [Shopify])) AS pvt
ORDER BY pvt.calendaryear DESC
, pvt.month
number DESC;

Приклад вихідних даних надано; повний набір даних не показано через обмеження по місцю.

pic

7.

Який відсоток продажів за демографічними групами для кожного року в наборі даних?

Рішення:

WITH cte1
AS
(SELECT cws.calendaryear
, SUM(CONVERT(BIGINT, cws.sales)) AS total
salesincalendaryear
FROM data
mart.cleanweeklysales cws
GROUP BY cws.calendar_year),

cte2
AS
(SELECT cws.calendaryear
, cws.demographic
, SUM(CONVERT(BIGINT, cws.sales)) AS total
salesbydemographic
FROM datamart.cleanweeklysales cws
GROUP BY cws.calendar
year
, cws.demographic)

SELECT pvt.calendaryear
, pvt.Couples
, pvt.Families
, pvt.unknown
FROM (SELECT cte2.calendar
year
, demographic
, CAST(((cte2.totalsalesbydemographic * 100.00) / cte1.totalsalesincalendaryear) AS DECIMAL(38, 2)) AS percentageofsalesbydemographic
FROM cte1
JOIN cte2 ON cte1.calendar
year = cte2.calendaryear) AS finalresult
PIVOT (SUM(finalresult.percentageofsalesbydemographic) FOR finalresult.demographic IN ([Couples], [Families], [unknown])) AS pvt
ORDER BY pvt.calendar_year DESC;

pic

8. Які значення age_band та demographic найбільше сприяють продажам Retail?

Рішення:

SELECT TOP 1 A.ageband
, A.demographic
FROM (SELECT cws.age
band
, cws.demographic
, SUM(CONVERT(BIGINT, cws.sales)) AS sales
FROM datamart.cleanweeklysales cws
GROUP BY cws.age
band
, cws.demographic) AS A
ORDER BY A.sales DESC;

pic

9. Чи можемо ми використовувати стовпець avg_transaction для обчислення середнього розміру транзакції за кожен рік для Retail та Shopify?

Якщо ні — як би ви це обчислювали?

Рішення:

SELECT pvt.calendaryear
, pvt.Retail
, pvt.Shopify
FROM (SELECT cws.calendar
year
, cws.platform
, CAST(AVG(cws.avgtransactions) AS DECIMAL(38, 2)) AS avgtransactionsize
FROM data
mart.cleanweeklysales cws
GROUP BY cws.calendaryear
, cws.platform) AS t
PIVOT (SUM(t.avg
transactionsize) FOR t.platform IN ([Retail], [Shopify])) AS pvt
ORDER BY pvt.calendar
year ASC;

pic

3. Аналіз "До та Після"

1. Які загальні продажі були за 4 тижні до та після 2020-06-15?

Який темп росту або скорочення в абсолютних значеннях та відсоткових змінах продажів?

Рішення:

WITH cte1
AS
(SELECT SUM(CONVERT(BIGINT, cws.sales)) AS totalsales4wksbefore20200615
FROM datamart.cleanweeklysales cws
WHERE cws.week
date >= CAST(DATEADD(WEEK, -4, ‘2020–06–15’) AS DATE)
AND cws.week_date < ‘2020–06–15’),

cte2
AS
(SELECT SUM(CONVERT(BIGINT, cws.sales)) AS totalsales4wksafter20200615
FROM datamart.cleanweeklysales cws
WHERE cws.week
date BETWEEN ‘2020–06–15’ AND CAST(DATEADD(WEEK, 4, ‘2020–06–15’) AS DATE))

SELECT totalsales4wksbefore20200615
, totalsales4wksafter20200615
, (totalsales4wksafter20200615 — totalsales4wksbefore20200615) AS absdifferenceinsales
, CAST(((total
sales4wksafter20200615 — totalsales4wksbefore20200615) * 100.00) / (totalsales4wksbefore20200615) AS DECIMAL(38, 2)) AS percentagechangeinsales
FROM cte1
, cte2;

pic

2.

Що щодо всіх 12 тижнів до та після?

Рішення:

WITH cte1
AS
(SELECT SUM(CONVERT(BIGINT, cws.sales)) AS totalsales12wksbefore20200615
FROM datamart.cleanweeklysales cws
WHERE cws.week
date >= CAST(DATEADD(WEEK, -12, ‘2020–06–15’) AS DATE)
AND cws.week_date < ‘2020–06–15’),

cte2
AS
(SELECT SUM(CONVERT(BIGINT, cws.sales)) AS totalsales12wksafter20200615
FROM datamart.cleanweeklysales cws
WHERE cws.week
date BETWEEN ‘2020–06–15’ AND CAST(DATEADD(WEEK, 12, ‘2020–06–15’) AS DATE))

SELECT totalsales12wksbefore20200615
, totalsales12wksafter20200615
, (totalsales12wksafter20200615 — totalsales12wksbefore20200615) AS absdifferenceinsales
, CAST(((total
sales12wksafter20200615 — totalsales12wksbefore20200615) * 100.00) / (totalsales12wksbefore20200615) AS DECIMAL(38, 2)) AS percentagechangeinsales
FROM cte1
, cte2;

pic

4. Бонусне питання

1.
Які області бізнесу мають найбільший негативний вплив на показники продажів у 2020 році для періоду 12 тижнів до та після?

— регіон
— платформа
— вікова група (ageband)
— демографія (demographic)
— тип клієнта (customer
type)

Рішення:

регіон:

WITH cte1
AS
(SELECT cws.region
, SUM(CONVERT(BIGINT, cws.sales)) AS totalsales12wksbefore20200615
FROM datamart.cleanweeklysales cws
WHERE cws.week
date >= CAST(DATEADD(WEEK, -12, ‘2020–06–15’) AS DATE)
AND cws.week_date < ‘2020–06–15’
GROUP BY cws.region),

cte2
AS
(SELECT cws.region
, SUM(CONVERT(BIGINT, cws.sales)) AS totalsales12wksafter20200615
FROM datamart.cleanweeklysales cws
WHERE cws.week
date BETWEEN ‘2020–06–15’ AND CAST(DATEADD(WEEK, 12, ‘2020–06–15’) AS DATE)
GROUP BY cws.region)

SELECT TOP 1 cte1.region
, cte1.totalsales12wksbefore20200615
, cte2.totalsales12wksafter20200615
, (cte2.totalsales12wksafter20200615 — cte1.totalsales12wksbefore20200615) AS absdifferenceinsales
, CAST(((cte2.total
sales12wksafter20200615 — cte1.totalsales12wksbefore20200615) * 100.00) / (cte1.totalsales12wksbefore20200615) AS DECIMAL(38, 2)) AS percentagechangeinsales
FROM cte1
JOIN cte2 ON cte1.region = cte2.region
ORDER BY percentagechangein_sales ASC;

pic

платформа:

WITH cte1
AS
(SELECT cws.platform
, SUM(CONVERT(BIGINT, cws.sales)) AS totalsales12wksbefore20200615
FROM datamart.cleanweeklysales cws
WHERE cws.week
date >= CAST(DATEADD(WEEK, -12, ‘2020–06–15’) AS DATE)
AND cws.week_date < ‘2020–06–15’
GROUP BY cws.platform),

cte2
AS
(SELECT cws.platform
, SUM(CONVERT(BIGINT, cws.sales)) AS totalsales12wksafter20200615
FROM datamart.cleanweeklysales cws
WHERE cws.week
date BETWEEN ‘2020–06–15’ AND CAST(DATEADD(WEEK, 12, ‘2020–06–15’) AS DATE)
GROUP BY cws.platform)

SELECT TOP 1 cte1.platform
, cte1.totalsales12wksbefore20200615
, cte2.totalsales12wksafter20200615
, (cte2.totalsales12wksafter20200615 — cte1.totalsales12wksbefore20200615) AS absdifferenceinsales
, CAST(((cte2.total
sales12wksafter20200615 — cte1.totalsales12wksbefore20200615) * 100.00) / (cte1.totalsales12wksbefore20200615) AS DECIMAL(38, 2)) AS percentagechangeinsales
FROM cte1
JOIN cte2 ON cte1.platform = cte2.platform
ORDER BY percentagechangein_sales ASC;

pic

вікова група (age_band):

WITH cte1
AS
(SELECT cws.ageband
, SUM(CONVERT(BIGINT, cws.sales)) AS total
sales12wksbefore20200615
FROM data
mart.cleanweeklysales cws
WHERE cws.weekdate >= CAST(DATEADD(WEEK, -12, ‘2020–06–15’) AS DATE)
AND cws.week
date < ‘2020–06–15’
GROUP BY cws.age_band),

cte2
AS
(SELECT cws.ageband
, SUM(CONVERT(BIGINT, cws.sales)) AS total
sales12wksafter20200615
FROM data
mart.cleanweeklysales cws
WHERE cws.weekdate BETWEEN ‘2020–06–15’ AND CAST(DATEADD(WEEK, 12, ‘2020–06–15’) AS DATE)
GROUP BY cws.age
band)

SELECT TOP 1 cte1.ageband
, cte1.total
sales12wksbefore20200615
, cte2.total
sales12wksafter20200615
, (cte2.total
sales12wksafter20200615 — cte1.totalsales12wksbefore20200615) AS absdifferenceinsales
, CAST(((cte2.totalsales12wksafter20200615 — cte1.totalsales12wksbefore20200615) * 100.00) / (cte1.totalsales12wksbefore20200615) AS DECIMAL(38, 2)) AS percentagechangeinsales
FROM cte1
JOIN cte2 ON cte1.age
band = cte2.ageband
ORDER BY percentage
changeinsales ASC;

pic

демографія (demographic):

WITH cte1
AS
(SELECT cws.demographic
, SUM(CONVERT(BIGINT, cws.sales)) AS totalsales12wksbefore20200615
FROM datamart.cleanweeklysales cws
WHERE cws.week
date >= CAST(DATEADD(WEEK, -12, ‘2020–06–15’) AS DATE)
AND cws.week_date < ‘2020–06–15’
GROUP BY cws.demographic),

cte2
AS
(SELECT cws.demographic
, SUM(CONVERT(BIGINT, cws.sales)) AS totalsales12wksafter20200615
FROM datamart.cleanweeklysales cws
WHERE cws.week
date BETWEEN ‘2020–06–15’ AND CAST(DATEADD(WEEK, 12, ‘2020–06–15’) AS DATE)
GROUP BY cws.demographic)

SELECT TOP 1 cte1.demographic
, cte1.totalsales12wksbefore20200615
, cte2.totalsales12wksafter20200615
, (cte2.totalsales12wksafter20200615 — cte1.totalsales12wksbefore20200615) AS absdifferenceinsales
, CAST(((cte2.total
sales12wksafter20200615 — cte1.totalsales12wksbefore20200615) * 100.00) / (cte1.totalsales12wksbefore20200615) AS DECIMAL(38, 2)) AS percentagechangeinsales
FROM cte1
JOIN cte2 ON cte1.demographic = cte2.demographic
ORDER BY percentagechangein_sales ASC;

pic

тип клієнта (customer_type):

WITH cte1
AS
(SELECT cws.customertype
, SUM(CONVERT(BIGINT, cws.sales)) AS total
sales12wksbefore20200615
FROM data
mart.cleanweeklysales cws
WHERE cws.weekdate >= CAST(DATEADD(WEEK, -12, ‘2020–06–15’) AS DATE)
AND cws.week
date < ‘2020–06–15’
GROUP BY cws.customer_type),

cte2
AS
(SELECT cws.customertype
, SUM(CONVERT(BIGINT, cws.sales)) AS total
sales12wksafter20200615
FROM data
mart.cleanweeklysales cws
WHERE cws.weekdate BETWEEN ‘2020–06–15’ AND CAST(DATEADD(WEEK, 12, ‘2020–06–15’) AS DATE)
GROUP BY cws.customer
type)

SELECT TOP 1 cte1.customertype
, cte1.total
sales12wksbefore20200615
, cte2.total
sales12wksafter20200615
, (cte2.total
sales12wksafter20200615 — cte1.totalsales12wksbefore20200615) AS absdifferenceinsales
, CAST(((cte2.totalsales12wksafter20200615 — cte1.totalsales12wksbefore20200615) * 100.00) / (cte1.totalsales12wksbefore20200615) AS DECIMAL(38, 2)) AS percentagechangeinsales
FROM cte1
JOIN cte2 ON cte1.customer
type = cte2.customertype
ORDER BY percentage
changeinsales ASC;

pic

Перекладено з: Case Study #5 — Data Mart — Solutions

Leave a Reply

Your email address will not be published. Required fields are marked *