Before getting started:
Learning sed
and AWK
is almost as learning a programming language.
I'm providing you only with basic commands, which can help you in most situations.
For more complex scenarios, refer to the manual, or the links at the end of this presentation.
Stands for Stream EDitor
Allows transforming an incoming input, one line at a time.
Transform how?
Show me some magic!
// File name: ~/Documents/server.js
// Dependencies
var express = require('express');
var bodyParser = require('body-parser');
// Default to port 3000
var PORT = process.env.PORT || 3000;
var app = express();
if (!process.env.PORT) {
console.warn('Defaulting to port 3000');
console.warn("It's better if you pass the server port in an ENV variable");
}
// Express middleware
console.debug('Injecting middleware');
app.use(bodyParser.json());
app.use(bodyParser.urlencoded({extended: false});
console.log('Finished injecting middleware');
app.get('/', function (req, res) {
console.debug('Making a request to root url');
// Body parser makes sure to send response as JSON
res.send({message: 'Hello, express!'});
});
app.listen(PORT, function () {
// To be shown after cranking server up
console.info('Server listening on port', PORT);
});
Given this text file:
# '-r' is for using extended regular expressions
# Here we tell sed to match lines starting with comment
# characters and delete them
~/Documents $ sed -r '/^\s*\/\// d' server.js
var express = require('express');
var bodyParser = require('body-parser');
var PORT = process.env.PORT || 3000;
var app = express();
if (!process.env.PORT) {
console.warn('Defaulting to port 3000');
console.warn("It's better if you pass the server port in an ENV variable");
}
console.debug('Injecting middleware');
app.use(bodyParser.json());
app.use(bodyParser.urlencoded({extended: false});
console.log('Finished injecting middleware');
app.get('/', function (req, res) {
console.debug('Making a request to root url');
res.send({message: 'Hello, express!'});
});
app.listen(PORT, function () {
console.info('Server listening on port', PORT);
});
Delete all commented out lines
# '-e' lets you chain multiple expressions
~/Documents $ sed -r -e '/^\s*\/\// d' -e '/console\.(debug|log)/ d' server.js
var express = require('express');
var bodyParser = require('body-parser');
var PORT = process.env.PORT || 3000;
var app = express();
if (!process.env.PORT) {
console.warn('Defaulting to port 3000');
console.warn("It's better if you pass the server port in an ENV variable");
}
app.use(bodyParser.json());
app.use(bodyParser.urlencoded({extended: false});
app.get('/', function (req, res) {
res.send({message: 'Hello, express!'});
});
app.listen(PORT, function () {
console.info('Server listening on port', PORT);
});
Don't want debug messages either
# 's /pattern/substitution/' replaces 'pattern' for 'substitution' in each line
~/Documents $ sed -r -e 's/var (.*)(,|=) (.*)/let \1\2 \3/' server.js
// File name: ~/Documents/server.js
// Dependencies
let express = require('express');
let bodyParser = require('body-parser');
// Default to port 3000
let PORT = process.env.PORT || 3000;
let app = express();
if (!process.env.PORT) {
console.warn('Defaulting to port 3000');
console.warn("It's better if you pass the server port in an ENV variable");
}
// Express middleware
console.debug('Injecting middleware');
app.use(bodyParser.json());
app.use(bodyParser.urlencoded({extended: false});
console.log('Finished injecting middleware');
# more text...
Substitute var
for let
~/Documents $ sed -r -e 's/PORT/IN/' server.js
// File name: ~/Documents/server.js
// Dependencies
var express = require('express');
var bodyParser = require('body-parser');
// Default to port 3000
var IN = process.env.PORT || 3000;
var app = express();
if (!process.env.IN) {
console.warn('Defaulting to port 3000');
console.warn("It's better if you pass the server port in an ENV variable");
}
// Express middleware
console.debug('Injecting middleware');
app.use(bodyParser.json());
app.use(bodyParser.urlencoded({extended: false});
console.log('Finished injecting middleware');
app.get('/', function (req, res) {
console.debug('Making a request to root url');
// Body parser makes sure to send response as JSON
res.send({message: 'Hello, express!'});
});
app.listen(IN, function () {
// To be shown after cranking server up
console.info('Server listening on port', IN);
});
Substitute PORT
for IN
?
's /pattern/substitution/'
replaces ONLY the FIRST occurrence in the line... let's fix that!
# Notice the 'g' after the 's/pattern/substitution/' command
~/Documents $ sed -r -e 's/PORT/IN/ g' server.js
// File name: ~/Documents/server.js
// Dependencies
var express = require('express');
var bodyParser = require('body-parser');
// Default to port 3000
var IN = process.env.IN || 3000;
var app = express();
if (!process.env.IN) {
console.warn('Defaulting to port 3000');
console.warn("It's better if you pass the server port in an ENV variable");
}
// Express middleware
console.debug('Injecting middleware');
app.use(bodyParser.json());
app.use(bodyParser.urlencoded({extended: false});
console.log('Finished injecting middleware');
# more text...
Substitute PORT
for IN
# This time, we're telling sed which range of lines we
# want to operate over
~/Documents $ sed '1,5 c console.log("Hello, world!")' server.js
console.log("Hello, world!")
// Default to port 3000
var PORT = process.env.PORT || 3000;
var app = express();
if (!process.env.PORT) {
console.warn('Defaulting to port 3000');
console.warn("It's better if you pass the server port in an ENV variable");
}
// Express middleware
console.debug('Injecting middleware');
app.use(bodyParser.json());
app.use(bodyParser.urlencoded({extended: false});
console.log('Finished injecting middleware');
# more text...
Change first 5 lines
# Mind the '\ ', so that 'date' doesn't fail
~/Documents $ sed "1 i // Modified on $(date +%Y-%M-%d\ %H:%m)" server.js
// Modified on 2016-14-25 21:09
// File name: ~/Documents/server.js
// Dependencies
var express = require('express');
var bodyParser = require('body-parser');
// Default to port 3000
var PORT = process.env.PORT || 3000;
var app = express();
if (!process.env.PORT) {
console.warn('Defaulting to port 3000');
console.warn("It's better if you pass the server port in an ENV variable");
}
# more text...
Insert text before the first line
~/Documents $ sed -r '/console\./ a \\' server.js
// File name: ~/Documents/server.js
// Dependencies
var express = require('express');
var bodyParser = require('body-parser');
// Default to port 3000
var PORT = process.env.PORT || 3000;
var app = express();
if (!process.env.PORT) {
console.warn('Defaulting to port 3000');
console.warn("It's better if you pass the server port in an ENV variable");
}
// Express middleware
console.debug('Injecting middleware');
app.use(bodyParser.json());
# more text...
Insert a blank after any line matching 'console.'
# '-i' to operate inline... that is, save changes to file
~/Documents $ sed -i -r -e '/^\s*\/\// d' -e '/console\.(debug|log)/ d' -e 's/PORT/IN/ g' server.js
# No output after hitting enter
~/Documents $ cat server.js
var express = require('express');
var bodyParser = require('body-parser');
var IN = process.env.IN || 3000;
var app = express();
if (!process.env.IN) {
console.warn('Defaulting to port 3000');
console.warn("It's better if you pass the server port in an ENV variable");
}
app.use(bodyParser.json());
app.use(bodyParser.urlencoded({extended: false});
app.get('/', function (req, res) {
res.send({message: 'Hello, express!'});
});
app.listen(IN, function () {
console.info('Server listening on port', IN);
});
Save all edits
Stands for...
Well, nothing too fancy, actually.
It was named after the initials of its creators: Alfred Aho, Peter Weinberger, Brian Kernighan
What is AWK useful for?
Text processing...
(bad) joking aside, AWK lets you process row/column formatted input (like the one you see in the /etc/passwd
file)...
where each row is supposed to be a record, and each column a field
Think of it as a mix of cut
+ tr
+ loads of steroids
Cool... and the examples?
# '-F' tells AWK how fields are separated within each record
~/Documents $ awk -F':' '{ print $1 $3 }' /etc/passwd
root0
bin1
daemon2
adm3
lp4
sync5
shutdown6
halt7
mail8
operator11
games12
ftp14
nobody99
avahi-autoipd170
systemd-bus-proxy999
systemd-network998
dbus81
# more output...
Print the username and UID from /etc/passwd
Username
UID
# OK, that didn't look quite good... fix it a bit
# Notice how the space is indicated
~/Documents $ awk -F':' '{ print $1 " " $3 }' /etc/passwd
root 0
bin 1
daemon 2
adm 3
lp 4
sync 5
shutdown 6
halt 7
mail 8
operator 11
games 12
ftp 14
nobody 99
avahi-autoipd 170
systemd-bus-proxy 999
systemd-network 998
dbus 81
# more output...
Spaces are the way you tell AWK to concatenate strings!
~/Documents $ awk -F':' '$3 > 500 { print $1 " " $3 }' /etc/passwd
systemd-bus-proxy 999
systemd-network 998
polkitd 997
santiaro90 1000
geoclue 996
unbound 995
openvpn 994
lightdm 993
setroubleshoot 992
nm-openvpn 991
nm-openconnect 990
vboxadd 989
test 1001
OK, do the same, but only when UID > 500
~/Documents $ awk -F':' '$3 > 500 { if(length($1) > 8) { print $1 " " $3 } }' /etc/passwd
systemd-bus-proxy 999
systemd-network 998
santiaro90 1000
setroubleshoot 992
nm-openvpn 991
nm-openconnect 990
Also, exclude names shorter than 8 characters
Nice, but that command's starting to become nasty... isn't it?
# Write this in a file... let's say ~/Documents/awk_tuto
$3 > 500 {
if (length($1) > 8) {
print $1 " " $3
}
}
You can write an AWK script and read commands from there, too!
# Magic!!!...
~/Documents $ awk -F':' -f awk_tuto /etc/passwd
systemd-bus-proxy 999
systemd-network 998
santiaro90 1000
setroubleshoot 992
nm-openvpn 991
nm-openconnect 990
...then let AWK know your commands are to be read from that file:
BEGIN {
print "We are adding a header"
FS = ":"
}
$3 > 500 {
if (length($1) > 8) {
print $1 " " $3
}
}
Let's change our file a little bit...
# Hey, we're not even telling what separator's being used... :)
~/Documents $ awk -f awk_tuto /etc/passwd
We are adding a header
systemd-bus-proxy 999
systemd-network 998
santiaro90 1000
setroubleshoot 992
nm-openvpn 991
nm-openconnect 990
...and then look what happens
The BEGIN
block is executed before any line gets processed.
FS
stands for Field Separator
, and tells AWK... well, you know...
AWK provides a couple more variables like FS
... Let's see some of them in action.
BEGIN {
print "We are adding a header"
FS = ":"
OFS = "->" # output field separator
}
# Skip first 15 records
# Process record only if UID > 500
$3 > 500 && NR > 15 { # NR is current record index (starting from 1)
print $1, $3 # use comma so OFS works nicely
}
# Yup, there's and END block as well... ;)
END {
print "This is kinda a footer"
}
Edit the script one more time...
# Hey, we're not even telling what separator's being used... :)
~/Documents $ awk -f awk_tuto /etc/passwd
We are adding a header
systemd-network->998
polkitd->997
santiaro90->1000
geoclue->996
unbound->995
openvpn->994
lightdm->993
setroubleshoot->992
nm-openvpn->991
nm-openconnect->990
vboxadd->989
test->1001
This is kinda a footer
...and check out!