βIn this Demo, we will
cost-and-usage-reports-239691
detailed-cost-analysis-report
cost-and-usage-reports
CREATE DATABASE IF NOT EXISTS detailed_cost_analysis_report
CREATE EXTERNAL TABLE IF NOT EXISTS detailed_cost_analysis_report.detailed_cost_analysis_report (
identity_line_item_id STRING, identity_time_interval STRING, bill_invoice_id STRING, bill_billing_entity STRING, bill_bill_type STRING,
bill_payer_account_id STRING, bill_billing_period_start_date TIMESTAMP, bill_billing_period_end_date TIMESTAMP,
line_item_usage_account_id STRING, line_item_line_item_type STRING, line_item_usage_start_date TIMESTAMP, line_item_usage_end_date TIMESTAMP,
line_item_product_code STRING, line_item_usage_type STRING, line_item_operation STRING, line_item_availability_zone STRING,
line_item_resource_id STRING, line_item_usage_amount DOUBLE, line_item_normalization_factor DOUBLE, line_item_normalized_usage_amount DOUBLE,
line_item_currency_code STRING, line_item_unblended_rate STRING, line_item_unblended_cost DOUBLE, line_item_blended_rate STRING,
line_item_blended_cost DOUBLE, line_item_line_item_description STRING, line_item_tax_type STRING, line_item_legal_entity STRING,
product_product_name STRING, product_availability STRING, product_availability_zone STRING, product_capacitystatus STRING,
product_classicnetworkingsupport STRING, product_clock_speed STRING, product_cloudformationresource_provider STRING,
product_current_generation STRING, product_durability STRING, product_ecu STRING, product_enhanced_networking_supported STRING,
product_from_location STRING, product_from_location_type STRING, product_from_region_code STRING, product_gpu_memory STRING,
product_group STRING, product_group_description STRING, product_instance_family STRING, product_instance_type STRING,
product_instance_type_family STRING, product_intel_avx2_available STRING, product_intel_avx_available STRING,
product_intel_turbo_available STRING, product_license_model STRING, product_location STRING, product_location_type STRING,
product_logs_destination STRING, product_marketoption STRING, product_memory STRING, product_network_performance STRING,
product_normalization_size_factor STRING, product_operating_system STRING, product_operation STRING, product_physical_processor STRING,
product_pre_installed_sw STRING, product_processor_architecture STRING, product_processor_features STRING, product_product_family STRING,
product_region STRING, product_region_code STRING, product_servicecode STRING, product_servicename STRING, product_sku STRING,
product_steps STRING, product_storage STRING, product_storage_class STRING, product_storage_media STRING, product_tenancy STRING,
product_to_location STRING, product_to_location_type STRING, product_to_region_code STRING, product_transfer_type STRING,
product_usagetype STRING, product_vcpu STRING, product_version STRING, product_volume_type STRING, product_vpcnetworkingsupport STRING,
pricing_rate_code STRING, pricing_rate_id STRING, pricing_currency STRING, pricing_public_on_demand_cost DOUBLE,
pricing_public_on_demand_rate STRING, pricing_term STRING, pricing_unit STRING,
reservation_amortized_upfront_cost_for_usage DOUBLE, reservation_amortized_upfront_fee_for_billing_period DOUBLE,
reservation_effective_cost DOUBLE, reservation_end_time STRING, reservation_modification_status STRING,
reservation_normalized_units_per_reservation STRING, reservation_number_of_reservations STRING, reservation_recurring_fee_for_usage DOUBLE,
reservation_start_time STRING, reservation_subscription_id STRING, reservation_total_reserved_normalized_units STRING,
reservation_total_reserved_units STRING, reservation_units_per_reservation STRING,
reservation_unused_amortized_upfront_fee_for_billing_period DOUBLE, reservation_unused_normalized_unit_quantity DOUBLE,
reservation_unused_quantity DOUBLE, reservation_unused_recurring_fee DOUBLE, reservation_upfront_value DOUBLE,
savings_plan_total_commitment_to_date DOUBLE, savings_plan_savings_plan_a_r_n STRING, savings_plan_savings_plan_rate DOUBLE,
savings_plan_used_commitment DOUBLE, savings_plan_savings_plan_effective_cost DOUBLE,
savings_plan_amortized_upfront_commitment_for_billing_period DOUBLE, savings_plan_recurring_commitment_for_billing_period DOUBLE
)
PARTITIONED BY (year STRING, month STRING)
ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
WITH SERDEPROPERTIES ('serialization.format' = '1')
LOCATION 's3://cost-and-usage-reports-239691/cost-and-usage-reports/detailed-cost-analysis-report/detailed-cost-analysis-report/';
MSCK REPAIR TABLE detailed_cost_analysis_report.detailed_cost_analysis_report;
SHOW PARTITIONS detailed_cost_analysis_report.detailed_cost_analysis_report;
SHOW CREATE TABLE detailed_cost_analysis_report.detailed_cost_analysis_report;
-- Query 1: Daily cost by service with service name
-- Select statement to analyze daily AWS service costs
-- Extract date from timestamp and alias as usage_date
SELECT
DATE(line_item_usage_start_date) as usage_date,
-- Get the AWS service code (e.g. AmazonEC2, AmazonS3)
line_item_product_code,
-- Get the human-readable AWS service name
product_product_name,
-- Sum the unblended cost and round to 2 decimal places
ROUND(SUM(line_item_unblended_cost), 2) as daily_cost,
-- Get the currency used for billing
pricing_currency as currency
-- Specify the table containing the cost and usage data
FROM detailed_cost_analysis_report.detailed_cost_analysis_report
-- Filter for usage in the last 30 days
WHERE line_item_usage_start_date >= current_date - interval '30' day
-- Only include records with actual costs (greater than 0)
AND line_item_unblended_cost > 0
-- Group by date, service code, service name and currency (using column positions)
GROUP BY 1, 2, 3, 5
-- Sort by date and cost (highest cost first)
ORDER BY 1, 4 DESC;
-- Query 2: Top 10 costly resources with resource description and region
-- Select statement to identify the most expensive AWS resources
-- Get the unique AWS resource identifier
SELECT
line_item_resource_id,
-- Get the AWS service code (e.g. AmazonEC2, AmazonS3)
line_item_product_code,
-- Get the human-readable AWS service name
product_product_name,
-- Get the detailed description of the resource usage
line_item_line_item_description as resource_description,
-- Get the AWS region where resource is deployed
product_region,
-- Sum the unblended cost and round to 2 decimal places
ROUND(SUM(line_item_unblended_cost), 2) as total_cost,
-- Get the currency used for billing
pricing_currency as currency
-- Specify the table containing the cost and usage data
FROM detailed_cost_analysis_report.detailed_cost_analysis_report
-- Filter out entries with empty resource IDs
WHERE line_item_resource_id != ''
-- Filter for usage in the last 30 days
AND line_item_usage_start_date >= current_date - interval '30' day
-- Only include records with actual costs (greater than 0)
AND line_item_unblended_cost > 0
-- Group by resource ID, service details, description, region and currency
GROUP BY 1, 2, 3, 4, 5, 7
-- Sort by total cost in descending order (highest first)
ORDER BY total_cost DESC
-- Limit to top 10 most expensive resources
LIMIT 10;
-- Query 3: EC2 daily costs with instance types
-- Select statement to analyze daily EC2 instance costs by type and region
-- Extract date from timestamp and alias as usage_date
SELECT
DATE(line_item_usage_start_date) as usage_date,
-- Get the EC2 instance type (e.g. t2.micro, m5.large)
product_instance_type,
-- Get the AWS region where instances are running
product_region,
-- Sum the unblended cost and round to 2 decimal places
ROUND(SUM(line_item_unblended_cost), 2) as daily_cost,
-- Get the currency used for billing
pricing_currency as currency
-- Specify the table containing the cost and usage data
FROM detailed_cost_analysis_report.detailed_cost_analysis_report
-- Filter for only EC2 service costs
WHERE line_item_product_code = 'AmazonEC2'
-- Filter for usage in the last 30 days
AND line_item_usage_start_date >= current_date - interval '30' day
-- Only include records with actual costs (greater than 0)
AND line_item_unblended_cost > 0
-- Group by date, instance type, region and currency
GROUP BY 1, 2, 3, 5
-- Sort by date and cost (highest cost first)
ORDER BY usage_date, daily_cost DESC;
-- Total cost by service with product details and usage metrics
-- Select statement to analyze total AWS service costs with detailed metrics
-- Get the AWS service code
SELECT line_item_product_code,
-- Get the human-readable AWS service name
product_product_name,
-- Get the AWS region for the service
product_region,
-- Sum the unblended cost and round to 2 decimal places
ROUND(SUM(line_item_unblended_cost), 2) as total_cost,
-- Get the currency code used for billing
line_item_currency_code as currency,
-- Count unique resources used within each service
COUNT(DISTINCT line_item_resource_id) as resource_count,
-- Count unique AWS accounts using each service
COUNT(DISTINCT line_item_usage_account_id) as account_count,
-- Sum total usage amount and round to 2 decimal places
ROUND(SUM(line_item_usage_amount), 2) as total_usage_amount,
-- Get the unit of measurement for usage (e.g. GB, hours)
pricing_unit as usage_unit
-- Specify the table containing the cost and usage data
FROM detailed_cost_analysis_report.detailed_cost_analysis_report
-- Filter for specific year
WHERE year = '2024'
-- Filter for specific month
AND month = '11'
-- Only include records with actual costs (greater than 0)
AND line_item_unblended_cost > 0
-- Group by service identifiers, region, currency and usage unit
GROUP BY line_item_product_code,
product_product_name,
product_region,
line_item_currency_code,
pricing_unit
-- Sort by total cost in descending order (highest first)
ORDER BY total_cost DESC;