Java Spring AWS: June 2018

Tuesday, 26 June 2018

AWS - Identify Broker - Java code

package com;

import com.amazonaws.auth.AWSStaticCredentialsProvider;
import com.amazonaws.auth.BasicSessionCredentials;
import com.amazonaws.regions.Regions;
import com.amazonaws.services.s3.AmazonS3;
import com.amazonaws.services.s3.AmazonS3ClientBuilder;
import com.amazonaws.services.s3.model.Bucket;
import com.amazonaws.services.s3.model.GetObjectRequest;
import com.amazonaws.services.s3.model.S3Object;
import com.amazonaws.services.securitytoken.AWSSecurityTokenService;
import com.amazonaws.services.securitytoken.AWSSecurityTokenServiceClientBuilder;
import com.amazonaws.services.securitytoken.model.AssumeRoleRequest;
import com.amazonaws.services.securitytoken.model.Credentials;
import com.amazonaws.util.StringUtils;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;

public class AssumeRole {

    private static BasicSessionCredentials sessionCredentials;

    public static void main(String[] args) {

        AmazonS3 s3 = getS3Client("ap-south-1","arn:aws:iam::867717:role/s3-role");

        GetObjectRequest request = new GetObjectRequest("amazonwebservicesbucket","index.html");
        S3Object fullObject = s3.getObject(request);

        try {
            displayTextInputStream(fullObject.getObjectContent());
        } catch (IOException e) {
            e.printStackTrace();
        }


        //With the temporary credentails we are able to list all bucket names.        AmazonS3 s3Client = AmazonS3ClientBuilder.standard().withCredentials(new AWSStaticCredentialsProvider(sessionCredentials)).build();
        for (Bucket bucket : s3Client.listBuckets()) {
            System.out.println(" - " + bucket.getName());
        }
    }

    public static AmazonS3 getS3Client(final String region, final String roleArn) {
        final Regions awsRegion = StringUtils.isNullOrEmpty(region) ? Regions.US_EAST_1 : Regions.fromName(region);

        if (StringUtils.isNullOrEmpty(roleArn)) {
            return AmazonS3ClientBuilder.standard().withRegion(awsRegion).build();
        } else {
            final AssumeRoleRequest assumeRole = new AssumeRoleRequest()
                    .withRoleArn(roleArn)
                    .withRoleSessionName("s3-role")
                    .withDurationSeconds(3600); // this has validity 1 hour, but default 12 hours, minimum of 15 min, max 36 hours we can set.
            final AWSSecurityTokenService sts = AWSSecurityTokenServiceClientBuilder.standard().withRegion(awsRegion).build();
            final Credentials credentials = sts.assumeRole(assumeRole).getCredentials();

            sessionCredentials = new BasicSessionCredentials(
                    credentials.getAccessKeyId(),
                    credentials.getSecretAccessKey(),
                    credentials.getSessionToken());

            System.out.println("AccessKeyID :"+credentials.getAccessKeyId());
            System.out.println("SecretAccessKey :"+credentials.getSecretAccessKey());
            System.out.println("SessionToken :"+credentials.getSessionToken());
            
            return AmazonS3ClientBuilder.standard().withRegion(awsRegion).withCredentials(new AWSStaticCredentialsProvider(sessionCredentials)).build();
        }
    }

    private static void displayTextInputStream(InputStream input) throws IOException {
        // Read the text input stream one line at a time and display each line.        BufferedReader reader = new BufferedReader(new InputStreamReader(input));
        String line = null;
        while ((line = reader.readLine()) != null) {
            System.out.println(line);
        }
        System.out.println();
    }
}

pom.xml

<?xml version="1.0" encoding="UTF-8"?><project xmlns="http://maven.apache.org/POM/4.0.0"         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>aws-sts</groupId>
    <artifactId>aws-sts</artifactId>
    <version>1.0-SNAPSHOT</version>


    <dependencies>
        <!-- https://mvnrepository.com/artifact/com.amazonaws/aws-java-sdk -->        <dependency>
            <groupId>com.amazonaws</groupId>
            <artifactId>aws-java-sdk</artifactId>
            <version>1.11.350</version>
        </dependency>

    </dependencies>
</project>

Thursday, 14 June 2018

Redis Cache Server - Setup/Testing/Java Code

Download stable version .tar/zip from here :https://redis.io/download

Follow this link for setup/testing : https://redis.io/topics/quickstart

Some commands to setup:

cd redis-stable

make

sudo make install

cd src

redis-server

Test it:

$ redis-cli ping

PONG ==> response

$ redis-cli

redis 127.0.0.1:6379> ping

PONG

redis 127.0.0.1:6379> set mykey somevalue

redis 127.0.0.1:6379> get mykey

"somevalue"

pom.xml

<?xml version="1.0" encoding="UTF-8"?><project xmlns="http://maven.apache.org/POM/4.0.0"         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>redis</groupId>
    <artifactId>redis</artifactId>
    <version>1.0-SNAPSHOT</version>
    <dependencies>
        <!-- https://mvnrepository.com/artifact/redis.clients/jedis -->        <dependency>
            <groupId>redis.clients</groupId>
            <artifactId>jedis</artifactId>
            <version>2.9.0</version>
        </dependency>

        <!-- https://mvnrepository.com/artifact/com.fasterxml.jackson.core/jackson-databind -->        <dependency>
            <groupId>com.fasterxml.jackson.core</groupId>
            <artifactId>jackson-databind</artifactId>
            <version>2.9.5</version>
        </dependency>

    </dependencies>

</project>

Java for Testing:

import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import redis.clients.jedis.Jedis;
import redis.clients.jedis.JedisPool;
import redis.clients.jedis.JedisPoolConfig;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;

public class Redisclient {

    public static void main(String[] args) {

        //Connecting to Redis server on localhost        Jedis jedis = new Jedis("localhost");


        //Can read jedis objects from Pool.
        /*JedisPoolConfig jedisPoolConfig = new JedisPoolConfig();        JedisPool pool = new JedisPool(jedisPoolConfig,                Constants.REDIS_IP_EUROPE,                Constants.REDIS_PORT,Constants.REDIS_TIMEOUT);        Jedis jedis = pool.getResource();        jedis.auth(Constants.REDIS_PWD);*/
        //Get all Student details from DB. Here we can use Tax calculation values.        List<Student> list = getAllStudentsFromDB();

        ObjectMapper mapper = new ObjectMapper();
        String studentJSON = "";
        try {
            studentJSON = mapper.writeValueAsString(list);
        } catch (JsonProcessingException e) {
            e.printStackTrace();
        }

        System.out.println("JSON : "+studentJSON);

        //writing value to redis        jedis.set("allstudents", studentJSON);

        //reading from redis        String allstudents = jedis.get("allstudents");

        try {
            List<Student> studentList = mapper.readValue(allstudents,new TypeReference<List<Student>>(){});
            studentList.forEach(System.out::println);

        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    public static List<Student> getAllStudentsFromDB() {

        Student st =  new Student();
        st.setAge(10);
        st.setId(1);
        st.setName("kumar");
        List<String> subjects = new ArrayList<String>();
        subjects.add("maths"); subjects.add("social"); subjects.add("science");
        st.setSubjects(subjects);

        Student st1 =  new Student();
        st1.setAge(110);
        st1.setId(11);
        st1.setName("kumar1");
        List<String> subjects1 = new ArrayList<String>();
        subjects1.add("maths1"); subjects1.add("social1"); subjects1.add("science1");
        st1.setSubjects(subjects1);

        List<Student> list = new ArrayList<Student>();
        list.add(st); list.add(st1);

        return list;
    }
}

Schedule Jobs/Cron Jobs - Linux machines

1 crontab

The command to create/edit, list, and remove cron jobs is crontab. If you call it with the -u option, it specifies the name of the user whose crontab is to be tweaked. If this option is not given, crontab examines "your" crontab, i.e., the crontab of the person executing the command. If you are looged in as root and run crontab without -u, then root's crontab is listed/modified/removed. If you are logged in as exampleuser and run crontab without -u, then exampleuser's crontab is listed/modified/removed.

Examples:

crontab -l

lists the cron jobs of the user as that you are currently logged in:

server1:~# crontab -l
* * * * * /usr/local/ispconfig/server/server.sh > /dev/null 2>> /var/log/ispconfig/cron.log
30 00 * * * /usr/local/ispconfig/server/cron_daily.sh > /dev/null 2>> /var/log/ispconfig/cron.log
server1:~#

crontab -u exampleuser -l

lists all cron jobs of exampleuser.

crontab -e

let's you create/modify the cron jobs of the user as that you are currently logged in (I'll come to the syntax in the next chapter).

crontab -u exampleuser -e

let's you create/modify the cron jobs of exampleuser.

crontab -r

deletes all cron jobs of the user as that you're currently logged in.

crontab -u exampleuser -r

deletes all cron jobs of exampleuser.

If you have written your cron jobs to a text file, you can use the text file to create the cron jobs. For example, let's assume you have created the text file /tmp/my_cron_jobs.txt...

vi /tmp/my_cron_jobs.txt

... with the following contents:

30 00 * * * /path/to/script

You can create a cron job from that file as follows:

crontab /tmp/my_cron_jobs.txt

(Or for exampleuser:

crontab -u exampleuser /tmp/my_cron_jobs.txt

)

Please note that this will overwrite all previously created cron jobs - if you've already created some cron jobs, you better use crontab -e and add the new cron job manually.

See

man crontab

to learn more about the crontab command.

2 Cron Job Syntax

A cron job consists out of six fields:

<minute> <hour> <day of month> <month> <day of week> <command>

              field          allowed values
              -----          --------------
              minute         0-59
              hour           0-23
              day of month   1-31
              month          1-12 (or names, see below)
              day of week    0-7 (0 or 7 is Sun, or use names)

When specifying day of week, both day 0 and day 7 will be considered Sunday.

A field may be an asterisk (*), which always stands for first-last.

Names can also be used for the "month" and "day of week" fields. Use the first three letters of the particular day or month (case doesn't matter), e.g. sun or SUN for Sunday or mar / MAR for March..

Let's take a look at the two cron jobs from the first chapter:

* * * * * /usr/local/ispconfig/server/server.sh > /dev/null 2>> /var/log/ispconfig/cron.log

This means: execute /usr/local/ispconfig/server/server.sh > /dev/null 2>> /var/log/ispconfig/cron.log once per minute.

30 00 * * * /usr/local/ispconfig/server/cron_daily.sh > /dev/null 2>> /var/log/ispconfig/cron.log

This means: execute /usr/local/ispconfig/server/cron_daily.sh > /dev/null 2>> /var/log/ispconfig/cron.log once per day at 00:30h.

The day of a command's execution can be specified by two fields: day of month, and day of week. If both fields are restricted (i.e., aren't *), the command will be run when either field matches the current time. For example, 30 4 1,15 * 5 would cause a command to be run at 4:30h on the 1st and 15th of each month, plus every Friday.

You can use ranges to define cron jobs:

Examples:

1,2,5,9 - means every first, second, fifth, and ninth (minute, hour, month, ...).

0-4,8-12 - means all (minutes, hours, months,...) from 0 to 4 and from 8 to 12.

*/5 - means every fifth (minute, hour, month, ...).

1-9/2 is the same as 1,3,5,7,9.

Ranges or lists of names are not allowed (if you are using names instead of numbers for months and days - e.g., Mon-Wed is not valid).

1,7,25,47 */2 * * * command

means: run command every second hour in the first, seventh, 25th, and 47th minute.

Instead of the first five fields, one of eight special strings may appear:

              string         meaning
              ------         -------
              @reboot        Run once, at startup.
              @yearly        Run once a year, "0 0 1 1 *".
              @annually      (same as @yearly)
              @monthly       Run once a month, "0 0 1 * *".
              @weekly        Run once a week, "0 0 * * 0".
              @daily         Run once a day, "0 0 * * *".
              @midnight      (same as @daily)
              @hourly        Run once an hour, "0 * * * *".

You can also use name=value pairs in a crontab to define variables for the cron jobs:

# use /bin/bash to run commands, instead of the default /bin/sh
SHELL=/bin/bash
# mail any output to exampleuser, no matter whose crontab this is
MAILTO=exampleuser
# set the PATH variable to make sure all commands in the crontab are found
PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin

* * * * * my_command

Please note: unless you set a PATH variable in a crontab, always use full paths in the crontab to make sure commands are found and can be executed. For example, instead of writing rsync, you should write /usr/bin/rsync. Use which to find out the full path of a program:

which rsync

server1:~# which rsync
/usr/bin/rsync
server1:~#

See

man 5 crontab

to learn more about the cron job syntax.

Load Balancer / Cluster - Algorithms for balancing

A computer cluster is a single logical unit consisting of multiple computers that are linked through a LAN. The networked computers essentially act as a single, much more powerful machine. A computer cluster provides much faster processing speed, larger storage capacity, better data integrity, superior reliability and wider availability of resources.

Computer clusters are, however, much more costly to implement and maintain. This results in much higher running overhead compared to a single computer.

Many organizations use computer clusters to maximize processing time, increase database storage and implement faster data storing & retrieving techniques.

There are many types of computer clusters, including:

Load-balancing clusters
High availability (HA) clusters
High performance (HP) clusters

The major advantages of using computer clusters are clear when an organization requires large scale processing. When used this way, computer clusters offer:

Cost efficiency: The cluster technique is cost effective for the amount of power and processing speed being produced. It is more efficient and much cheaper compared to other solutions like setting up mainframe computers.
Processing speed: Multiple high-speed computers work together to provided unified processing, and thus faster processing overall.
Improved network infrastructure: Different LAN topologies are implemented to form a computer cluster. These networks create a highly efficient and effective infrastructure that prevents bottlenecks.
Flexibility: Unlike mainframe computers, computer clusters can be upgraded to enhance the existing specifications or add extra components to the system.
High availability of resources: If any single component fails in a computer cluster, the other machines continue to provide uninterrupted processing. This redundancy is lacking in mainframe systems.

Algorithms for balancing is one of the most important factors in this context, then we will explain three basic methods :

Least Connections

This technique redirects the requests to the lowest based on the number of requests / server connections. For example, if server 1 is currently handling 50 requests / connections, and server 2 controls 25 requests / connections, the next request / connection will be automatically directed to the second server, since the server currently has fewer requests / connections active.

Round Robin

This method uses the technique of always direct requests to the next available server in a circular fashion. For example, incoming connections are directed to the server 1, server 2 and then finally server 3 and then the server 1 returns.

Weighted Fair

This technique directs the requests to the load based on the requests of each and the responsiveness of the same (performance) For example, if the servers server 1 is four times faster in servicing requests from the server 2, the administrator places a greater burden of work for the server 1 to server 2.

Refer:

https://www.esds.co.in/blog/cluster-computing-definition-and-architecture-of-a-cluster/#sthash.SEcCghTH.dpbs

Tuesday, 12 June 2018

AWS - Private EC2 - Bastion setup on MAC

This is the most secured way of connecting to Private EC2 instances from Host Computer. Even if Bastion host(Public EC2) is compromised/hacked, nobody can access Private EC2 instances. Because .pem key files are stored in Host Computer.

1)Create a VPC
2)Create a subnet
3)Subnets =>"Subnet Actions" dropdown menu ==> 'Modify auto assign IP settings'
4)Create Internet Gateway to make Subnet internet accessible.
5)Attach Internet Gateway to VPC.
6)IMPORTANT : Add an entry in VPC ==> 'Routes Table' section ==> 'Routes' tab ==> Destination (0.0.0.0/0) and Target (new Internet Gateway) ==> This step exposes Public EC2 instance to Internet and also Private EC2 can access internet, otherwise Public EC2 instance can't be accessed by outside world.
7)Create 2 Security Group - 1 for Public EC2 and 1 for Private EC2. For Private EC2 Security group ==> in Inbound Rules ==> make sure you referring Source value to Public EC2 Security group. This ensures Public EC2 Bastion hosts acts as a mediator between Private EC2 and outside world.
8)Create EC2 instance, while creating instance make sure you selecting appropriate Subnet, and select 'Disable' option in 'Auto-assign Public IP' if you want to create private EC2 instance. If you want to get an public IP(EC2 instance) leave it as default value (Use subnet settings(Enable)).
9) While creating private EC2 instance make sure you referring to appropriate 'Security Group'.
10)Connect to Public EC2 using this command:

ssh ec2-user@<publicIP> -i <pem key>

To verify whether it's connected to Internet - run this command, this updates the linux software:

yum update -y

or to Install Apache : yum install httpd -y

11) To connect to private EC2 using Bastion Host(Public EC2). Use the below settings in SSH

HOST bastion
IdentityFile ~/.ssh/pem/mylinuxkey.pem
User ec2-user
Hostname 35.168.23.91

HOST 10.*
user ec2-user
IdentityFile ~/.ssh/pem/mylinuxkey.pem
ProxyCommand ssh bastion -W %h:%p

Save these steps in ~/.ssh/config

And run the command as below:

ssh 10.0.1.123

==> This connects to Private EC2 using the ProxyCommand with bastion(Public EC2) details

12) To connect to S3 from private EC3, create an EndPoints in VPC section. Select Service name:com.amazonaws.us-east-1.s3 And select appropriate VPC. This configuration makes and entry to Route Tables of VPC configuration.
13) Once you login to Private EC2, enter below command to fetch a Bucket files.

aws s3 ls s3://usbastiontesting --recursive

***NOTE : Make sure S3 and VPC are in same region, otherwise we can't fetch files from other region buckets. Currently Cross region is not supported by Endpoints.

Monday, 11 June 2018

AWS - IAAS / PAAS / SAAS

AWS Cloud Levels:

Shared Responsibility Model for Infrastructure Services (IAAS)

Infrastructure services, such as Amazon EC2, Amazon EBS, and Amazon VPC, run on top of the AWS global infrastructure.

Shared Responsibility Model for Container Services (PAAS)

The AWS shared responsibility model also applies to container services, such as Amazon RDS and Amazon EMR. For these services, AWS manages the underlying infrastructure and foundation services, the operating system and the application platform. For example, Amazon RDS for Oracle is a managed database service in which AWS manages all the layers of the container, up to and including the Oracle database platform. For services such as Amazon RDS, the AWS platform provides data backup and recovery tools; but it is your responsibility to configure and use tools in relation to your business continuity and disaster recovery (BC/DR) policy.

Shared Responsibility Model for Abstracted Services (SAAS)

For abstracted services, such as Amazon S3 and Amazon DynamoDB, AWS operates the infrastructure layer, the operating system, and platforms and you access the endpoints to store and retrieve data. Amazon S3 and DynamoDB are tightly integrated with IAM. You are responsible for managing your data (including classifying your assets), and for using IAM tools to apply ACL-type permissions to individual resources at the platform level, or permissions based on user identity or user responsibility at the IAM user/group level. For some services, such as Amazon S3, you can also use platform-provided encryption of data at rest, or platform-provided HTTPS encapsulation for your payloads for protecting your data in transit to and from the service

AWS exam experience

big focus on DynamoDB (~15/55 questions) and S3 (~12/55 questions)

You should know about:

Optimising S3 Key (File) names for heavy load (http://docs.aws.amazon.com/AmazonS3/latest/dev/request-rate-perf-considerations.html)

Optimising DynamoDB operations (http://docs.aws.amazon.com/amazondynamodb/latest/developerguide/QueryAndScanGuidelines.html)

ELB and session cookies (http://docs.aws.amazon.com/ElasticLoadBalancing/latest/DeveloperGuide/elb-sticky-sessions.html)

CORS

Resources that Elastic Beanstalk can create

CloudFormation Output section and Function you would use

SNS message format

Not covered in the course:

Options on authenticating using LDAP together with IAM.

============================================================

https://acloud.guru/forums/aws-certified-developer-associate/discussion/-KUdI5f2LNbi4wvK7v4I/How%20to%20PASS%20AWS%20Certified%20Developer%20Exam

http://169.254.169.254/latest/meta-data/

SDK -

IOS, Android, Browser (Java scripts)

Java, .NET,

Node.js, PHP, Python, Ruby

Go, C++

SQS - message oriented API

SQS - Message can contain upto 256KB of text, billed at 64KB chunks,

Single request can have 1 to 10 messages unto maximum of 256KB payload

Even though there is one message of 256Kb its basically 4 request for billing since (4 * 64KB)

NO ORDER - SQS messages can be delivered multiple times in any order

Design - you can have 2 priority queues for priority based message one for higher and other for lower priority

EC2 instances always poll for messages from the queue (pull from the queue and not push)

Visibility timeout always start from when the application instance polled the message.

Great design - Visibility timeout expires that means there is a failure somewhere since that message was polled but not processed and hence not deleted so other some other process will poll the message again and visibility timeout starts again.

Visibility timeout by default is 30 Seconds up to 12 hour maximum (ChangeMessageVisibility) / maximum visibility

Maximum long polling timeout 20 seconds (http://docs.aws.amazon.com/AWSSimpleQueueService/latest/SQSDeveloperGuide/sqs-long-polling.html) —ReceiveMessageWaitTimeSeconds

Messages in the Queue can be retained for up to 14 days

First 1 million request ares free, then $0.50 PER EVERY MILLION REQUESTS

SNS

works on a publish - subscribe model, SNS notifies the message, and hence push based approach. Inexpensive pay as you go

CloudWatch or Autoscaling triggers SNS

SNS can notify to Email, Text / SMS, SQS or any HTTP end point.

protocols: HTTP, HTTPS, EMAIL, EMAIL-JSON, SQS or Application - messages can be customized for each protocol

SNS messages are stored redundantly to multiple AZs

SNS Dataformat - JSON (Subject, Message, TopicArn, MessageId, unsubscribeURL etc..)

$0.50 per 1 million SNS request

Different price for different recipient types

to HTTP: $0.06 / 100,000 notifications deliveries

to EMAIL: $2 / 100,000 notifications deliveries

to SMS: $0.75 / 100 notifications deliveries

http://docs.aws.amazon.com/sns/latest/dg/mobile-push-send-devicetoken.html (CreatePlatformEndpoint API)

SWF - task oriented API

Simple Work flow - human interaction to complete order or collection of services to complete a work order.

Workers - interact with SWF to get task, process received task and return the result

Deciders - program that co-ordinates the tasks, i.e. - ordering, concurrency and scheduling

Workers and Deciders can run independently

TASK is only assigned ONCE and NEVER DUPLICATED (key difference from SQS where messages can be processed multiple times)

SWF Domain - think of it as a container for the work flow. you can register a domain by Console or API

Maximum workflow processing time can 1 year (equivalent seconds) - SQS is 12 hours processing time

CloudFormation

Use of CFT, Beanstalk and Autoscaling are free but you pay for the AWS resources that these services create.

Fn::GetAtt - values that you can use to return result for an AWS created resource or used to display in output

By Default - rollback everything on error

Infrastructure as a code, Version controlled, declarative and flexible

ElasticBeanstalk

Its uses ASG,ELB,EC2,RDS,SNS and S3 to provision things.

Environment Tier - Webserver, Worker

Predefined Configurations - IIS, Node.JS, PHP, Python, Ruby, Tomcat, Go, .NET,

preconfigured docker: Glassfish, Python or generic docker

Environment URL - has to be unique

Dashboard - Recent events, Monitor, Logs, Alarms, Upload and Deploy and Configurations

Configuration - Scaling, Instances (DIRTMCG instance types, key pair), Notifications, Software configuration (e.g. PHP.ini), Networking tier (ELB, VPC config), Data tier(RDS)

Environment properties (Access key and secret key as parameters)

DynamoDB

fast - flexible No sql database - single digit ms latency, fully managed, supports document and key-value (web, gaming, ad-tech, IOT)..

Table, Item (row), attribute (key - value)

Eventual Consistent Reads vs Strongly Consistent Reads

Read Capacity Units, Write Capacity Units (can be scaled up) - push button scalability

Writes are written to 3 different location / facilities/ datacenter (synchronous) - Amazon DynamoDB synchronously replicates data across three facilities in an AWS Region, giving you high availability and data durability.

Two types of primary key -

(1) Single Attribute (think unique id) - Partition Key (Hash Key) composted of 1 attribute (no nesting allowed here) - Partition key will help determine the physical location of data.

(2) Composite key (think unique id and range) - Partition Key(Hash Key) & Sort Key (Range key - e.g date) - composed of 2 attributes - if two data have same partition key (same location) it must have a different sort key, and they will be stored together on single location.

Secondary Indexes

(1) Local Secondary Index - Same Partion Key + Different Sort Key ( can only be created while creating the table, cannot be added/removed or modified later)

(2) Global Secondary Index - Different Partition Key + Different Sort Key ( can be created during the table creation or can be added later or removed / modified later)

DynamoDB Streams

use to capture any kinda modification to the dynamo db table, Lambda can capture events and push notifications thru SES

Table can be exported to csv (either select all items )

Query vs Scan

Query operation finds item in a table using only primary key attribute values , must provide partition attribute name and the value to search for, you can optionally provide a sort key attribute name and value to refine search results (e.g. all the forums with this ID since last 7 days). By default Query returns all the data attributes for those items with specified primary keys. You can further use ProjectionExpression parameter to only return a selected attributes.

Query results are always sorted by the sort key (ascending for both numbers and string by default). To reverse the sort order set the ScanIndexForward parameter to false

By Default Queries are going to be Eventually consistent but can be changed to StronglyConsistent.

Scan operation is basically examines every item - e.g. dumping the entire table, by default Scan returns all the data attributes but we could use ProjectionExpression parameter to only return a selected attributes.

Query operation is more efficient than scan operation

For quick response time design your table in a way that you can use Query Get or BatchGetItem API (read multiple items - can get upto 100 items or up to 1MB of data) ,

Alternatively design your application to use scan operation in a way that minimize impact of your table’s request rate since it can use up the entire table’s provisioned throughput in a single scan operation

DynamoDB Provisioned Throughput calculations

Items == rows

Read Provisioned Throughput

All units are rounded up to 4KB increments

Eventual Consistent reads (default) consist of 2 reads per second

Strongly Consistent reads consist of 1 read per second

( Size of Read Rounded to nearest 4KB Chunk / 4 KB * no of items ) / 2 <— if eventual consistency

( Size of Read Rounded to nearest 4KB Chunk / 4 KB * no of items ) / 1 <— if strongly consistency

Write Provisioned Throughput

All units are rounded up to 1KB increments

All writes consist of 1 write per second

( Size of write in KB * no of items ) / 1

When you exceed your maximum allowed provisioned throughput for a table or one or more global secondary index you will get 400 HTTP Status code - ProvisionedThroughputExceededException

AssumeRolewithWebIdentity role

Idempotent conditional write

Atomic counters - always need to increment so its not idempotent

if data is critical and no margin of error then must use Idempotent conditional write.

http://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Limits.html#limits-tables

Only Tables(256 table per region) and ProvisionedThroughput(80 K read, 80K write per account for US east, 20K for other regions) limits can be increased

http://docs.aws.amazon.com/amazondynamodb/latest/developerguide/QueryAndScanGuidelines.html (Reduce Page Size for Scan operation and Isolate Scan Operation)

S3

secure, durable, highly scalable object store (1 byte to 5TB), universal namespace (must be unique bucket - regardless of regions),object based key value store, VersionID, Metadata, ACL

The total volume of data and number of objects you can store are unlimited. Individual Amazon S3 objects can range in size from 1 byte to 5 terabytes. The largest object that can be uploaded in a single PUT is 5 gigabytes. For objects larger than 100 megabytes, customers should consider using the Multipart Upload capability. it mean the largest single file into S3 is 5G, but after the 5G files are in S3, they can be assembled into a 5T file,

You can use a Multipart Upload for objects from 5 MB to 5 TB in size (Exam question, scenario where more than 5GB file needs to be uploaded)

object based storage vs block based Storage (EFS)

data is spread out in multiple facilities, you can loose two facilities and still have access to files

For PUTS of New Objects (Read after Write Consistency), For Overwrite PUTS and DELETE (Eventual Consistency)

http://docs.aws.amazon.com/general/latest/gr/awsservicelimits.html#limits_s3 ( Number of S3 bucket limit per account — 100)

Storage Tiers/ Class

S3 Standard - Durability (11 9s), Availability (99.99 %) - reliable regular for just about everything

S3 IA (Infrequent Access) - Durability (11 9s), Availability (99.9 %) - accessed every 1 month to 6 months or so (infrequent) but rapid access and low retrieval time (few ms)

S3 RRS(Reduced Redundant Storage)- Durability (99.99%), Availability (99.99 %) - less durability (data that can easily be regenerated - e.g thumbnails) - cheapest of all s3, less fault tolerant then the other two since you are willing to loose the data, reproducible data

Glacier - for archival only (3 to 5 hours restore time)

S3 price - charged for Storage, number of requests, data transfer (tiered so more you use less charge)

bucket name has to be all lowercase letters

S3 for static website hosting (Static Website Hosting > Enable website hosting) - no dynamic

Any time you create a bucket nothing is publicly accessible / Any time you add an object to a bucket its private by default (you will get 403) > Make the files public (even for public hosting)

every object inside the bucket can have different storage class (S3 standard, S3-IA, S3-RRS) and you can turn on server side encryption (AES - 256)

regular bucket link: https://s3-eu-west-1.amazonaws.com/ankittest <— https

bucket with Static website hosting: http://ankittestsite.s3-website-eu-west-1.amazonaws.com <— http (has to be for static hosting), you can turn it into SSL / https with cloudfront though

CORS (CROSS ORIGIN RESOURCE SHARING) - to avoid the use of proxy

Versioning - once enable you cannot disable versioning / although it can be suspend it , if you want to turn it off delete the bucket and recreate (version id)

Once you delete the delete marker, you can get the file back that you have deleted while versioning on

every version is stored separately in the bucket for each version / might not be a good choice for cost perspective for large media files., multiple updates use case also not ideal for versioning.

Versioning’s MFA Delete Capability can be used to provide additional layer of security.

Cross Region Replication - (requires versioning enabled on source and destination buckets)

you can enable - need source and destination bucket (create a new bucket, source bucket will not show up on drop down of destination)

Existing objects will not be replicated, only new objects will be replicated across the region

Lifecycle management in S3

(1) when versioning is disabled

Transition to IA S3 - min 30 days and has a 128KB minimum of object size

Archive to Glacier - min 1 day if IA is not checked, min 60 day if Transition to IA S3 is checked

Permanently Delete - min 2 day if IA is not checked and 1 is selected for Glacier, min 61 day if IA is selected 30, Glacier is selected 60.

(2) when versioning is enabled you have lifecycle management options to take action on previous version as well as current version.

Security and Encryption in S3

by default newly created buckets are private

Access control using Bucket Policies (entire bucket) and ACL(individual objects and folders)

access logs - all the request made to S3 buckets, to another bucket or another account’s S3 bucket

Encryption

(1) In Transit - SSL / TLS

(2) Data at rest

Server Side Encryption

SSE- S3 Server Side Encryption with S3 managed keys, (amazon AES 256 handled for you) - click on the object and encrypt

SSE - KMS - AWS Key management services , managed keys - additional charges / audit trail of keys, amazon manage keys

SSE - C - Server side encryption with Customer provided keys - you manage encryption keys

Client Side Encryption

you encrypt the data on client side and upload to s3
Every non-anonymous request to S3 must contain authentication information to establish the identity of the principal making the request. In REST, this is done by first putting the headers in a canonical format, then signing the headers using your AWS Secret Access Key.

You can insert a presigned url into a webpage to download private data directly from S3.

The object creation REST APIs (see Specifying Server-Side Encryption Using the REST API) provide a request header, x-amz-server-side-encryption that you can use to request server-side encryption.

S3 Transfer Acceleration

Utilize local edge locations to upload content to S3 - incur extra cost

further away you are the more benefit you get (faster)

GateWay

(1) Gateway stored volumes - entire dataset is stored onsite and asynchronously backed up to S3

(2) Gateway cached volumes - Most frequently used data is stored onsite and entire dataset is stored on S3

(3) Gateway Virtual Taped library - Used for backup if you don’t want to use Tapes, like Netbackups etc..

Import Export

Import / Export Disk

Import to S3, EBS, Glacier

export from S3

Import / Export Snowball

Import to S3

Export to S3

S3 stored data in alphabetical / lexigraphical order. so if you want to spread the load across S3, filename should not be similar (Optimize performance)

CloudFront

Content Delivery Network - edge locations, reduced latency, traffic serves from the closest nodes

Edge locations - content will be cached (over 50), different from region / AZ. TTL (speed of image // media is quicker - first user suffers the performance), can be not only read only (you can write it)

Origin can be - S3, EC2, ELB, Route53 also NON AWS origin server ,

Distribution - name given to the CDN consist of collection of Edge locations

(1) Web Distribution -

(2) RTMP (media streaming / flash) Distribution - for Adobe flash files only

you can have multiple origins of a distribution

Path Pattern (*)

Restrict viewer access by signed URL or Signed Cookies

Restrict content based on geo location (whitelist and blacklist)

Create invalidate - invalidate TTL (you pay for it) like purge in Akamai

VPC - logical datacenters in AWS

Can span multiple AZ, but can’t span multiple regions, PEER VPC, but no Transitive Peering

Custom VPC has to be /16 can’t go higher then that /8 is not allowed

When you create Custom VPC it creates default security group, default network ACL and default route table., it doesn’t create default Subnet

One Subnet == one AZ, you can have security group spanning multiple AZ, ACL’s span across AZ (assign sg and ACL to two different subnets)

any CIDR block 5 reserved IPs (.0, .1, .2, .3, .255)

so for CIRD block /24: 2^8 - 5 = 256 - 5 = 251 available IP address space

when you create internet gateway, by default its detached, attach it to VPC then, only 1 IGW per VPC

When you create a VPC Default Routetable(Main Routable) is created where the default Routes are,

10.0.0.0/16 Local <— all subnets inside VPC will be able to talk to each other

Don’t touch Main route table

Create another routetable for route out to internet (0.0.0.0/0 IGW) <— route out to the internet

Last thing you associate this new route table to one of the subnet which will make it public. (you can enable auto assign public IP for the public subnet)

1 subnet can have 1 routetable

ICMP is for ping / monitor

NAT instance and NAT gateway

NAT Instance - disable source / destination check., always behind security group, must be in public subnet, must have an EIP, ,must be a route out of the private subnet to NAT

Increase the instance size if bottleneck

Change the main route table - add a route (0.0.0.0/0 NAT Instance target)

NAT Instance is a single point of failover (put it behind a ASG),

NAT gateway - released in 2016 - amazon handled

Amazon maintains it for you, no need to handle yourself. (security patches applied by AWS)

You can just create the gateway and assign EIP (put it in public subnet) (automatically assigned)

Change the main route table - add a route (0.0.0.0/0 NAT gateway target)

No need for disable source/destination check or no need to put it behind a security group - it handles it for you.

Highly available / redundancy no need for ASG.NAT gateways are little bit costly - always use it in production scale automatically up to 10Gbps

ACL vs SG

Security groups are statefull - any inbound rule , applies to outbound as well (Only Allow rules)

by default all inbound deny, all outbound allow

can span across AZ

ACL are stateless -

For default ACL, all inbound and outbound rules are allowed by default - associated with all subnets in VPC by default

for Custom ACL, all inbound and outbound traffic is denied by default - not associated with any subnet

1 subnet is only associated with ACL. granular rules for ACLs, numbered rules (recommended steps of 100)

rule no. 99 takes precedence over rule no. 100 (if 99 is blocked and 100 is allowed) 99 will be executed.

Can SPAN across AZ

Ephemeral port - 1024 - 65535 should be allowed to take traffic.

if you want to BLOCK IP address then must use ACL, because security group doesn’t have deny

Bastion - keep it in public subnet to allow SSH / RDP into instances into private subnets (High availability - Bation in two public subnets and also ASG - Route 53 running Health checks on those Bastion)

VPC Flowlogs: to capture all the traffic information into logs - logs everything (create IAM role and create cloud watch log group - and log stream)

VPC Cleanup: can’t delete VPC if you have active running instance or ELB is running