load required libraries

# require library dplyr to mutate, filter data on 2007/02/01 and 2007/02/02
library(dplyr)
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# require lubridate to get year, month, mday to filter data
library(lubridate)

set workspace and define data set filename

# work directory with 5 R script and data set file insize
setwd("/Users/hadoop/ExData_Plotting1")

# define dataset filename
fileName= "household_power_consumption.txt"

CourseProject1.R:

  1. predict required memory to read the whole file “household_power_consumption.txt”
  2. getData function:
    1. for performance issue, skip first 63000 rows and read 20000 rows only
    2. mutate data set to create weekday, year, month , mday, hour, minute, second
    3. filter dataset from 2007/02/01 to 2007/02/03(make plot and axis)
  3. setXAxis function
    1. filter datetime at 00:00:00, and makes axis by weekday
  4. plotEnergySubMetering function
    1. plot Sub_metering_1 v.s datetime
    2. lines Sub_metering_2 v.s datetime
    3. lines Sub_metering_3 v.s datetime
    4. create legend
#define function for plotting
source("CourseProject1.R")
## [1] "require memory [column * rows * 8 / 1024^2 ](MB) = 142.496795654297 MB"
## [1] "predict memory[read 1000 rows to get object size first]: 256.937555541992 MB"

plot1: histogram of global active power and save to png

source("plot1.R")

plot2: xyplot of global active power v.s. datetime and save to png

#plot2
source("plot2.R")

plot3: xyplot Energy Sub metering plot v.s. datetime and save to png

#plot3
source("plot3.R")

plot4: create 4 plots in one plot and save to png

#plot4
source("plot4.R")