3 Steps
Save 30-90% $/perf
CREATE EXTERNAL TABLE ....
SELECT...FROM
WHERE...GROUP BY...HAVING...ORDER BY
ALTER TABLE ...ADD PARTITION
var JDBC = require('jdbc');
var jinst = require('jdbc/lib/jinst');
if (!jinst.isJvmCreated()) {
jinst.addOption("-Xrs");
jinst.setupClasspath(['./AthenaJDBC41-*.jar']);
}
var config = {
url: 'jdbc:awsathena://athena.*.amazonaws.com:443',
drivername: 'com.amazonaws.athena.jdbc.AthenaDriver',
minpoolsize: 10,
maxpoolsize: 100,
properties: {
s3_staging_dir: 's3://aws-athena-query-results-*/',
log_path: '/logs/athenajdbc.log',
user: 'access_key',
password: 'secret_key'
}
};
var hsqldb = new JDBC(config);
hsqldb.initialize(function(err) {
if (err) {
console.log(err);
}
});
import java.sql.*;
import java.util.Properties;
import com.amazonaws.athena.jdbc.AthenaDriver;
import com.amazonaws.auth.PropertiesFileCredentialsProvider;
public class AthenaJDBCDemo {
static final String athenaUrl = "jdbc:awsathena://athena.us-east-1.amazonaws.com:443";
public static void main(String[] args) {
Connection conn = null;
Statement statement = null;
try {
Class.forName("com.amazonaws.athena.jdbc.AthenaDriver");
Properties info = new Properties();
info.put("s3_staging_dir", "s3://my-athena-result-bucket/test/");
info.put("log_path", "/Users/myUser/.athena/athenajdbc.log");
info.put("aws_credentials_provider_class","com.amazonaws.auth.PropertiesFileCredentialsProvider");
info.put("aws_credentials_provider_arguments","/Users/myUser/.athenaCredentials");
String databaseName = "default";
System.out.println("Connecting to Athena...");
conn = DriverManager.getConnection(athenaUrl, info);
System.out.println("Listing tables...");
String sql = "show tables in "+ databaseName;
statement = conn.createStatement();
ResultSet rs = statement.executeQuery(sql);
while (rs.next()) {
//Retrieve table column.
String name = rs.getString("tab_name");
//Display values.
System.out.println("Name: " + name);
}
rs.close();
conn.close();
} catch (Exception ex) {
ex.printStackTrace();
} finally {
try {
if (statement != null)
statement.close();
} catch (Exception ex) {
}
try {
if (conn != null)
conn.close();
} catch (Exception ex) {
ex.printStackTrace();
}
}
System.out.printf("Finished connectivity test.");
}
}
Item | AWS Athena | GCP Big Query |
---|---|---|
Query | $ 5 / TB scanned | $ 5 / TB scanned |
Store Data | $ 20 / TB** in S3 | $ 20 / TB** in BQ |
* Loading and Exporting data is FREE
** Price per TB per month
Item | AWS Athena | GCP Big Query |
---|---|---|
Query | ||
Input data | ||
Output data |
Spectrum
Best for 'ad-hoc' SQL queries of data in S3
Best for 'frequently-accessed' and 'highly-structured' data stored in S3
Use Case | AWS | GCP |
---|---|---|
Ad hoc | Athena* | BigQuery* |
Data Warehouse | Redshift | BigQuery* |
Batch Transform | EMR / Spark | Dataproc / Spark |
Streaming | Manual Process | BigQuery* |
*Serverless
Not quite yet
@LynnLangit