load required libraries
# require library dplyr to mutate, filter data on 2007/02/01 and 2007/02/02
library(dplyr)
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# require lubridate to get year, month, mday to filter data
library(lubridate)
set workspace and define data set filename
# work directory with 5 R script and data set file insize
setwd("/Users/hadoop/ExData_Plotting1")
# define dataset filename
fileName= "household_power_consumption.txt"
CourseProject1.R:
- predict required memory to read the whole file “household_power_consumption.txt”
- getData function:
- for performance issue, skip first 63000 rows and read 20000 rows only
- mutate data set to create weekday, year, month , mday, hour, minute, second
- filter dataset from 2007/02/01 to 2007/02/03(make plot and axis)
- setXAxis function
- filter datetime at 00:00:00, and makes axis by weekday
- plotEnergySubMetering function
- plot Sub_metering_1 v.s datetime
- lines Sub_metering_2 v.s datetime
- lines Sub_metering_3 v.s datetime
- create legend
#define function for plotting
source("CourseProject1.R")
## [1] "require memory [column * rows * 8 / 1024^2 ](MB) = 142.496795654297 MB"
## [1] "predict memory[read 1000 rows to get object size first]: 256.937555541992 MB"
plot1: histogram of global active power and save to png
source("plot1.R")

plot2: xyplot of global active power v.s. datetime and save to png
#plot2
source("plot2.R")

plot3: xyplot Energy Sub metering plot v.s. datetime and save to png
#plot3
source("plot3.R")

plot4: create 4 plots in one plot and save to png
#plot4
source("plot4.R")
