https://8weeksqlchallenge.com/case-study-5/
1. Кроки очищення даних
DROP TABLE IF EXISTS datamart.cleanweeklysales;
CREATE TABLE datamart.cleanweeklysales (
weekdate DATE
, weeknumber INT
, monthnumber INT
, calendaryear INT
, region NVARCHAR(20)
, platform NVARCHAR(10)
, segment NVARCHAR(10)
, customertype NVARCHAR(10)
, ageband NVARCHAR(15)
, demographic NVARCHAR(10)
, transactions INT
, sales INT
, avg_transactions DECIMAL(27, 2)
);
INSERT INTO datamart.cleanweeklysales
(weekdate
, weeknumber
, monthnumber
, calendaryear
, region
, platform
, segment
, customertype
, ageband
, demographic
, transactions
, sales
, avgtransactions)
SELECT DATEFROMPARTS(CASE
WHEN RIGHT(ws.weekdate, 2) = ‘20’ THEN 2020
WHEN RIGHT(ws.weekdate, 2) = ‘19’ THEN 2019
WHEN RIGHT(ws.weekdate, 2) = ‘18’ THEN 2018
END
, CONVERT(INT, LEFT(RIGHT(ws.weekdate, 4), 1))
, CASE
WHEN CHARINDEX(‘/’, ws.weekdate) = 2 THEN CONVERT(INT, LEFT(ws.weekdate, 1))
WHEN CHARINDEX(‘/’, ws.weekdate) = 3 THEN CONVERT(INT, LEFT(ws.weekdate, 2))
END) AS weekdate
, DATEPART(WEEK, DATEFROMPARTS(CASE
WHEN RIGHT(ws.weekdate, 2) = ‘20’ THEN 2020
WHEN RIGHT(ws.weekdate, 2) = ‘19’ THEN 2019
WHEN RIGHT(ws.weekdate, 2) = ‘18’ THEN 2018
END
, CONVERT(INT, LEFT(RIGHT(ws.weekdate, 4), 1))
, CASE
WHEN CHARINDEX(‘/’, ws.weekdate) = 2 THEN CONVERT(INT, LEFT(ws.weekdate, 1))
WHEN CHARINDEX(‘/’, ws.weekdate) = 3 THEN CONVERT(INT, LEFT(ws.weekdate, 2))
END)) AS weeknumber
, DATEPART(MONTH, DATEFROMPARTS(CASE
WHEN RIGHT(ws.weekdate, 2) = ‘20’ THEN 2020
WHEN RIGHT(ws.weekdate, 2) = ‘19’ THEN 2019
WHEN RIGHT(ws.weekdate, 2) = ‘18’ THEN 2018
END
, CONVERT(INT, LEFT(RIGHT(ws.weekdate, 4), 1))
, CASE
WHEN CHARINDEX(‘/’, ws.weekdate) = 2 THEN CONVERT(INT, LEFT(ws.weekdate, 1))
WHEN CHARINDEX(‘/’, ws.weekdate) = 3 THEN CONVERT(INT, LEFT(ws.weekdate, 2))
END)) AS monthnumber
, DATEPART(YEAR, DATEFROMPARTS(CASE
WHEN RIGHT(ws.weekdate, 2) = ‘20’ THEN 2020
WHEN RIGHT(ws.weekdate, 2) = ‘19’ THEN 2019
WHEN RIGHT(ws.weekdate, 2) = ‘18’ THEN 2018
END
, CONVERT(INT, LEFT(RIGHT(ws.weekdate, 4), 1))
, CASE
WHEN CHARINDEX(‘/’, ws.weekdate) = 2 THEN CONVERT(INT, LEFT(ws.weekdate, 1))
WHEN CHARINDEX(‘/’, ws.weekdate) = 3 THEN CONVERT(INT, LEFT(ws.weekdate, 2))
END)) AS calendaryear
, ws.region AS region
, ws.platform AS platform
, CASE
WHEN ws.segment = ‘null’ THEN ‘unknown’
ELSE ws.segment
END AS segment
, ws.customertype AS customertype
, CASE
WHEN ws.segment <> ‘null’ THEN CASE
WHEN CONVERT(INT, RIGHT(ws.segment, 1)) = 1 THEN ‘Young Adults’
WHEN CONVERT(INT, RIGHT(ws.segment, 1)) = 2 THEN ‘Middle Aged’
WHEN CONVERT(INT, RIGHT(ws.segment, 1)) = 3 OR CONVERT(INT, RIGHT(ws.segment, 1)) = 4 THEN ‘Retirees’
END
WHEN ws.segment = ‘null’ THEN ‘unknown’
END AS ageband
, CASE
WHEN ws.segment <> ‘null’ THEN CASE
WHEN LEFT(ws.segment, 1) = ‘C’ THEN ‘Couples’
WHEN LEFT(ws.segment, 1) = ‘F’ THEN ‘Families’
END
WHEN ws.segment = ‘null’ THEN ‘unknown’
END AS demographic
, ws.transactions AS transactions
, ws.sales AS sales
, CAST(ROUND((ws.sales * 100.00) / (ws.transactions), 2) AS DECIMAL(27, 2)) AS avgtransactions
FROM datamart.weeklysales ws;
SELECT *
FROM datamart.cleanweekly_sales cws;
Приклад вихідних даних надано; повний набір даних не показано через обмеження по місцю.
2. Дослідження даних
1.
Що за день тижня використовується для кожного значення week_date?
SELECT DATENAME(WEEKDAY, cws.weekdate) AS dayoftheweek
— , COUNT(*)
FROM datamart.cleanweeklysales cws
GROUP BY DATENAME(WEEKDAY, cws.weekdate)
ORDER BY DATENAME(WEEKDAY, cws.week_date);
2. Який діапазон номерів тижнів відсутній у наборі даних?
Рішення:
WITH cte
AS
(SELECT 1 AS weeknumber
UNION ALL
SELECT weeknumber + 1
FROM cte
WHERE weeknumber < 53)
SELECT weeknumber AS missingweeknumbers
FROM cte
WHERE weeknumber NOT IN (SELECT cws.weeknumber
FROM datamart.cleanweekly_sales cws);
Приклад вихідних даних надано; повний набір даних не показано через обмеження по місцю.
3. Скільки всього транзакцій було за кожен рік у наборі даних?
Рішення:
SELECT cws.calendaryear AS calendaryear
, SUM(cws.transactions) AS totaltransactions
FROM datamart.cleanweeklysales cws
GROUP BY cws.calendaryear
ORDER BY cws.calendaryear DESC;
4. Який загальний обсяг продажів для кожного регіону за кожен місяць?
Рішення:
WITH cte
AS
(SELECT cws.region AS region
, cws.monthnumber AS month
, SUM(CONVERT(BIGINT, cws.sales)) AS totalsales
FROM datamart.cleanweeklysales cws
GROUP BY cws.region
, cws.monthnumber)
SELECT region
, DATENAME(MONTH, DATEADD(MONTH, month, 0) — 1) AS monthname
, totalsales
FROM cte
ORDER BY region ASC
, month ASC;
Приклад вихідних даних надано; повний набір даних не показано через обмеження по місцю.
5. Яка загальна кількість транзакцій для кожної платформи?
Рішення:
SELECT cws.platform
, SUM(cws.transactions) AS totaltransactions
FROM datamart.cleanweeklysales cws
GROUP BY cws.platform
ORDER BY cws.platform ASC;
6. Який відсоток продажів для Retail порівняно з Shopify за кожен місяць?
Рішення:
— cte1: Обчислення загальних місячних продажів для кожного календарного року окремо
WITH cte1
AS
(SELECT cws.calendaryear
, cws.monthnumber
, SUM(CONVERT(BIGINT, cws.sales)) AS totalmonthlysales
FROM datamart.cleanweeklysales cws
GROUP BY cws.calendaryear
, cws.month_number),
— cte2: Обчислення місячних продажів по платформах для кожного календарного року окремо
cte2
AS
(SELECT cws.platform
, cws.calendaryear
, cws.monthnumber
, SUM(CONVERT(BIGINT, cws.sales)) AS salesbyplatform
FROM datamart.cleanweeklysales cws
GROUP BY cws.platform
, cws.calendaryear
, cws.month_number)
SELECT pvt.calendaryear
, DATENAME(MONTH, DATEADD(MONTH, pvt.monthnumber, 0) — 1) AS calendarmonth
, pvt.Retail
, pvt.Shopify
FROM (SELECT cte2.calendaryear
, cte2.monthnumber
, platform
, CAST((cte2.salesbyplatform * 100.00 / cte1.totalmonthlysales) AS DECIMAL(38, 2)) AS platformwisemonthlysalespercentage
FROM cte1
JOIN cte2 ON cte1.calendaryear = cte2.calendaryear
AND cte1.monthnumber = cte2.monthnumber) AS finalresult
PIVOT (SUM(finalresult.platformwisemonthlysalespercentage)
FOR finalresult.platform IN ([Retail], [Shopify])) AS pvt
ORDER BY pvt.calendaryear DESC
, pvt.monthnumber DESC;
Приклад вихідних даних надано; повний набір даних не показано через обмеження по місцю.
7.
Який відсоток продажів за демографічними групами для кожного року в наборі даних?
Рішення:
WITH cte1
AS
(SELECT cws.calendaryear
, SUM(CONVERT(BIGINT, cws.sales)) AS totalsalesincalendaryear
FROM datamart.cleanweeklysales cws
GROUP BY cws.calendar_year),
cte2
AS
(SELECT cws.calendaryear
, cws.demographic
, SUM(CONVERT(BIGINT, cws.sales)) AS totalsalesbydemographic
FROM datamart.cleanweeklysales cws
GROUP BY cws.calendaryear
, cws.demographic)
SELECT pvt.calendaryear
, pvt.Couples
, pvt.Families
, pvt.unknown
FROM (SELECT cte2.calendaryear
, demographic
, CAST(((cte2.totalsalesbydemographic * 100.00) / cte1.totalsalesincalendaryear) AS DECIMAL(38, 2)) AS percentageofsalesbydemographic
FROM cte1
JOIN cte2 ON cte1.calendaryear = cte2.calendaryear) AS finalresult
PIVOT (SUM(finalresult.percentageofsalesbydemographic) FOR finalresult.demographic IN ([Couples], [Families], [unknown])) AS pvt
ORDER BY pvt.calendar_year DESC;
8. Які значення age_band та demographic найбільше сприяють продажам Retail?
Рішення:
SELECT TOP 1 A.ageband
, A.demographic
FROM (SELECT cws.ageband
, cws.demographic
, SUM(CONVERT(BIGINT, cws.sales)) AS sales
FROM datamart.cleanweeklysales cws
GROUP BY cws.ageband
, cws.demographic) AS A
ORDER BY A.sales DESC;
9. Чи можемо ми використовувати стовпець avg_transaction для обчислення середнього розміру транзакції за кожен рік для Retail та Shopify?
Якщо ні — як би ви це обчислювали?
Рішення:
SELECT pvt.calendaryear
, pvt.Retail
, pvt.Shopify
FROM (SELECT cws.calendaryear
, cws.platform
, CAST(AVG(cws.avgtransactions) AS DECIMAL(38, 2)) AS avgtransactionsize
FROM datamart.cleanweeklysales cws
GROUP BY cws.calendaryear
, cws.platform) AS t
PIVOT (SUM(t.avgtransactionsize) FOR t.platform IN ([Retail], [Shopify])) AS pvt
ORDER BY pvt.calendaryear ASC;
3. Аналіз "До та Після"
1. Які загальні продажі були за 4 тижні до та після 2020-06-15?
Який темп росту або скорочення в абсолютних значеннях та відсоткових змінах продажів?
Рішення:
WITH cte1
AS
(SELECT SUM(CONVERT(BIGINT, cws.sales)) AS totalsales4wksbefore20200615
FROM datamart.cleanweeklysales cws
WHERE cws.weekdate >= CAST(DATEADD(WEEK, -4, ‘2020–06–15’) AS DATE)
AND cws.week_date < ‘2020–06–15’),
cte2
AS
(SELECT SUM(CONVERT(BIGINT, cws.sales)) AS totalsales4wksafter20200615
FROM datamart.cleanweeklysales cws
WHERE cws.weekdate BETWEEN ‘2020–06–15’ AND CAST(DATEADD(WEEK, 4, ‘2020–06–15’) AS DATE))
SELECT totalsales4wksbefore20200615
, totalsales4wksafter20200615
, (totalsales4wksafter20200615 — totalsales4wksbefore20200615) AS absdifferenceinsales
, CAST(((totalsales4wksafter20200615 — totalsales4wksbefore20200615) * 100.00) / (totalsales4wksbefore20200615) AS DECIMAL(38, 2)) AS percentagechangeinsales
FROM cte1
, cte2;
2.
Що щодо всіх 12 тижнів до та після?
Рішення:
WITH cte1
AS
(SELECT SUM(CONVERT(BIGINT, cws.sales)) AS totalsales12wksbefore20200615
FROM datamart.cleanweeklysales cws
WHERE cws.weekdate >= CAST(DATEADD(WEEK, -12, ‘2020–06–15’) AS DATE)
AND cws.week_date < ‘2020–06–15’),
cte2
AS
(SELECT SUM(CONVERT(BIGINT, cws.sales)) AS totalsales12wksafter20200615
FROM datamart.cleanweeklysales cws
WHERE cws.weekdate BETWEEN ‘2020–06–15’ AND CAST(DATEADD(WEEK, 12, ‘2020–06–15’) AS DATE))
SELECT totalsales12wksbefore20200615
, totalsales12wksafter20200615
, (totalsales12wksafter20200615 — totalsales12wksbefore20200615) AS absdifferenceinsales
, CAST(((totalsales12wksafter20200615 — totalsales12wksbefore20200615) * 100.00) / (totalsales12wksbefore20200615) AS DECIMAL(38, 2)) AS percentagechangeinsales
FROM cte1
, cte2;
4. Бонусне питання
1.
Які області бізнесу мають найбільший негативний вплив на показники продажів у 2020 році для періоду 12 тижнів до та після?
— регіон
— платформа
— вікова група (ageband)
— демографія (demographic)
— тип клієнта (customertype)
Рішення:
регіон:
WITH cte1
AS
(SELECT cws.region
, SUM(CONVERT(BIGINT, cws.sales)) AS totalsales12wksbefore20200615
FROM datamart.cleanweeklysales cws
WHERE cws.weekdate >= CAST(DATEADD(WEEK, -12, ‘2020–06–15’) AS DATE)
AND cws.week_date < ‘2020–06–15’
GROUP BY cws.region),
cte2
AS
(SELECT cws.region
, SUM(CONVERT(BIGINT, cws.sales)) AS totalsales12wksafter20200615
FROM datamart.cleanweeklysales cws
WHERE cws.weekdate BETWEEN ‘2020–06–15’ AND CAST(DATEADD(WEEK, 12, ‘2020–06–15’) AS DATE)
GROUP BY cws.region)
SELECT TOP 1 cte1.region
, cte1.totalsales12wksbefore20200615
, cte2.totalsales12wksafter20200615
, (cte2.totalsales12wksafter20200615 — cte1.totalsales12wksbefore20200615) AS absdifferenceinsales
, CAST(((cte2.totalsales12wksafter20200615 — cte1.totalsales12wksbefore20200615) * 100.00) / (cte1.totalsales12wksbefore20200615) AS DECIMAL(38, 2)) AS percentagechangeinsales
FROM cte1
JOIN cte2 ON cte1.region = cte2.region
ORDER BY percentagechangein_sales ASC;
платформа:
WITH cte1
AS
(SELECT cws.platform
, SUM(CONVERT(BIGINT, cws.sales)) AS totalsales12wksbefore20200615
FROM datamart.cleanweeklysales cws
WHERE cws.weekdate >= CAST(DATEADD(WEEK, -12, ‘2020–06–15’) AS DATE)
AND cws.week_date < ‘2020–06–15’
GROUP BY cws.platform),
cte2
AS
(SELECT cws.platform
, SUM(CONVERT(BIGINT, cws.sales)) AS totalsales12wksafter20200615
FROM datamart.cleanweeklysales cws
WHERE cws.weekdate BETWEEN ‘2020–06–15’ AND CAST(DATEADD(WEEK, 12, ‘2020–06–15’) AS DATE)
GROUP BY cws.platform)
SELECT TOP 1 cte1.platform
, cte1.totalsales12wksbefore20200615
, cte2.totalsales12wksafter20200615
, (cte2.totalsales12wksafter20200615 — cte1.totalsales12wksbefore20200615) AS absdifferenceinsales
, CAST(((cte2.totalsales12wksafter20200615 — cte1.totalsales12wksbefore20200615) * 100.00) / (cte1.totalsales12wksbefore20200615) AS DECIMAL(38, 2)) AS percentagechangeinsales
FROM cte1
JOIN cte2 ON cte1.platform = cte2.platform
ORDER BY percentagechangein_sales ASC;
вікова група (age_band):
WITH cte1
AS
(SELECT cws.ageband
, SUM(CONVERT(BIGINT, cws.sales)) AS totalsales12wksbefore20200615
FROM datamart.cleanweeklysales cws
WHERE cws.weekdate >= CAST(DATEADD(WEEK, -12, ‘2020–06–15’) AS DATE)
AND cws.weekdate < ‘2020–06–15’
GROUP BY cws.age_band),
cte2
AS
(SELECT cws.ageband
, SUM(CONVERT(BIGINT, cws.sales)) AS totalsales12wksafter20200615
FROM datamart.cleanweeklysales cws
WHERE cws.weekdate BETWEEN ‘2020–06–15’ AND CAST(DATEADD(WEEK, 12, ‘2020–06–15’) AS DATE)
GROUP BY cws.ageband)
SELECT TOP 1 cte1.ageband
, cte1.totalsales12wksbefore20200615
, cte2.totalsales12wksafter20200615
, (cte2.totalsales12wksafter20200615 — cte1.totalsales12wksbefore20200615) AS absdifferenceinsales
, CAST(((cte2.totalsales12wksafter20200615 — cte1.totalsales12wksbefore20200615) * 100.00) / (cte1.totalsales12wksbefore20200615) AS DECIMAL(38, 2)) AS percentagechangeinsales
FROM cte1
JOIN cte2 ON cte1.ageband = cte2.ageband
ORDER BY percentagechangeinsales ASC;
демографія (demographic):
WITH cte1
AS
(SELECT cws.demographic
, SUM(CONVERT(BIGINT, cws.sales)) AS totalsales12wksbefore20200615
FROM datamart.cleanweeklysales cws
WHERE cws.weekdate >= CAST(DATEADD(WEEK, -12, ‘2020–06–15’) AS DATE)
AND cws.week_date < ‘2020–06–15’
GROUP BY cws.demographic),
cte2
AS
(SELECT cws.demographic
, SUM(CONVERT(BIGINT, cws.sales)) AS totalsales12wksafter20200615
FROM datamart.cleanweeklysales cws
WHERE cws.weekdate BETWEEN ‘2020–06–15’ AND CAST(DATEADD(WEEK, 12, ‘2020–06–15’) AS DATE)
GROUP BY cws.demographic)
SELECT TOP 1 cte1.demographic
, cte1.totalsales12wksbefore20200615
, cte2.totalsales12wksafter20200615
, (cte2.totalsales12wksafter20200615 — cte1.totalsales12wksbefore20200615) AS absdifferenceinsales
, CAST(((cte2.totalsales12wksafter20200615 — cte1.totalsales12wksbefore20200615) * 100.00) / (cte1.totalsales12wksbefore20200615) AS DECIMAL(38, 2)) AS percentagechangeinsales
FROM cte1
JOIN cte2 ON cte1.demographic = cte2.demographic
ORDER BY percentagechangein_sales ASC;
тип клієнта (customer_type):
WITH cte1
AS
(SELECT cws.customertype
, SUM(CONVERT(BIGINT, cws.sales)) AS totalsales12wksbefore20200615
FROM datamart.cleanweeklysales cws
WHERE cws.weekdate >= CAST(DATEADD(WEEK, -12, ‘2020–06–15’) AS DATE)
AND cws.weekdate < ‘2020–06–15’
GROUP BY cws.customer_type),
cte2
AS
(SELECT cws.customertype
, SUM(CONVERT(BIGINT, cws.sales)) AS totalsales12wksafter20200615
FROM datamart.cleanweeklysales cws
WHERE cws.weekdate BETWEEN ‘2020–06–15’ AND CAST(DATEADD(WEEK, 12, ‘2020–06–15’) AS DATE)
GROUP BY cws.customertype)
SELECT TOP 1 cte1.customertype
, cte1.totalsales12wksbefore20200615
, cte2.totalsales12wksafter20200615
, (cte2.totalsales12wksafter20200615 — cte1.totalsales12wksbefore20200615) AS absdifferenceinsales
, CAST(((cte2.totalsales12wksafter20200615 — cte1.totalsales12wksbefore20200615) * 100.00) / (cte1.totalsales12wksbefore20200615) AS DECIMAL(38, 2)) AS percentagechangeinsales
FROM cte1
JOIN cte2 ON cte1.customertype = cte2.customertype
ORDER BY percentagechangeinsales ASC;
Перекладено з: Case Study #5 — Data Mart — Solutions