Introduction

Archive for Dummies

WARNING

Arch Linux

  • only support up to date and official packages

  • strongly discourage users to mix up to date packages with outdated

Use with pacman

# /etc/pacman.conf
# See the pacman.conf(5) manpage for option and repository directives

# GENERAL OPTIONS
[options]
#RootDir     = /
#DBPath      = /var/lib/pacman/
#CacheDir    = /var/cache/pacman/pkg/

...

[core]
Server = http://ala.seblu.net/repos/2014/03/30/$repo/os/$arch

[extra]
Server = http://ala.seblu.net/repos/2014/03/30/$repo/os/$arch

...

Use with a web browser

Background

  • Inspired by the Arch Linux Rollback Machine (ARM) project
  • The original ARM was stopped in August 2013
  • A from scratch replacement started on seblu.net
  • Proposed for being an official service in March 2014
  • Renamed into Arch Linux Archive (following the proposal)
  • Moved to a new DNS ala.seblu.net
  • Second proposal for being official with DNS archive.al.org and inclusion of agetpkg in October 2015

agetpkg

Download old versions in a snap

In a nutshell

  • A CLI tool to list/get/install packages from the Archive
  • FOSS, coded in Python 3
  • Use a local index of all the packages in the Archive
  • Download .sig files for package signature verfications
  • Should land in official repositories

Get a package

Get all packages

List packages

Install packages

Packages Index

  • Cache is checked every time (unless -U)
  • Update is forced with -u
  • Freshness is checked with HTTP IMF (If-Modified-Since)
  • About 3 years of index is 217 Kibibytes

RTFU

archivetools

behind the scene

In a nutshell

  • Less than 150 lines of code in Bash​
    • A PKGBUILD is provided
    • Use systemd timers and services
  • No smart code on the HTTP server
    • Very dumb "API"; based on file path and format
  • Use rsync.archlinux.org (or its mirrors) to snapshot
    • Only download the difference between 2 days
    • Massive use of hardlinks for improved storage
  • Extra features
    • ISO images & bootstrap tarballs archiving
    • Packages hierarchy  
    • Timed repositories: last, week, month

How to install

git clone https://github.com/seblu/archivetools.git
cd archivetools
makepkg -i
systemctl enable archive.timer 
  1. Install deps: bash, rsync, hardlink, xz, util-linux
  2. Install the archive.sh in /usr/bin/archive
  3. Copy the default config file in /etc/archive.conf
  4. Create an archive user/group
  5. Create a /srv/archive directory
  6. Setup systemd timers and services
  7. Run /usr/bin/archive

Easy way

Manual way

a

r

c

h

i

v

e

.conf

# Archlinux remote rsync server
ARCHIVE_RSYNC='rsync://polymorf.fr/archlinux/'

# Archive local directory
ARCHIVE_DIR='/srv/archive'

# Archive User and Group
ARCHIVE_USER=archive
ARCHIVE_GROUP=archive

# Package extensions
PKGEXT='.pkg.tar.xz'
PKGSIG="$PKGEXT.sig"

# Umask used when archiving
UMASK=022

#### Repositories

# Enable repositories archiving
ARCHIVE_REPO=1

# Enable daily repo trees
# This offer daily, weekly, monthly, yearly snapshot trees
REPO_DAYLY=1

# Enable packages tree
# Create a dedicated tree with packages sorted by name
REPO_PACKAGES=1

# Enable repository package index
# Needed by agetpkg to list packages
REPO_PACKAGES_INDEX=1

# Search in all repository snapshop. This is slow.
# Better option is to let it to 0 to look only in the last snapshot
REPO_PACKAGES_FULL_SEARCH=0

#### ISO

# Enable ISO archiving
ARCHIVE_ISO=1

systemd units

[Unit]
Description=Archive Update

[Service]
Type=oneshot
EnvironmentFile=/etc/archive.conf
User=archive
Group=archive
ExecStart=/usr/bin/archive
[Unit]
Description=Daily Archive Update

[Timer]
OnCalendar=12:42:00
AccuracySec=1m
Persistent=true

[Install]
WantedBy=timers.target
Also=archive-hardlink.timer
[Unit]
Description=Archive Hardlinking
After=archive.service

[Service]
Type=oneshot
EnvironmentFile=/etc/archive.conf
User=archive
Group=archive
Nice=19
IOSchedulingClass=idle
ExecStart=/usr/bin/hardlink -Ocv ${ARCHIVE_DIR}
[Unit]
Description=Monthly Hardlinkng of Archive

[Timer]
OnCalendar=*-*-03 03:00:00
AccuracySec=1m
Persistent=true

[Install]
WantedBy=timers.target

archive.service

archive-hardlink.service

archive.timer

archive-hardlink.timer

nginx server config

    # archlinux archive
    # ala.seblu.net
    server {
        listen [::]:80;
        listen 80;
        server_name ala.seblu.net;
        access_log /var/log/nginx/ala.seblu.net.access.log;
        error_log /var/log/nginx/ala.seblu.net.error.log;

        location / {
            root /srv/http/archive;
            autoindex on;
            autoindex_exact_size off;
        }
    }

The

archive

tree

The iso tree

The 

r

e

p

o

s

 

tree

The

 

p

a

c

k

a

g

e

s

 

 

tree

The packages/.all directory

The snapshoting code

repo_rsync() {
	msg "Snapshoting repositories"

	# ...

	# compute last but today
	local LAST="$(ls -1d "$REPO_DIR"/2???/*/*|sort|grep -v $SNAPR|tail -n1)"

	[[ -n "$LAST" ]] && local LINKDEST="--link-dest=$LAST/"

	rsync  -rltH $LINKDEST --exclude '*/.*' --exclude 'iso/*' "$ARCHIVE_RSYNC" "$SNAP/" ||
		error "Unable to rsync: $ARCHIVE_RSYNC."

	# ...
}

the v0 index format

repo_packages_index() {
    msg 'Updating package index'

    # ...

    find "$1" -name "*$PKGEXT" -printf '%f\n'|sed 's/.\{'${#PKGEXT}'\}$//'|sort|xz -9 > "$TMPINDEX"

    # ...
}
Made with Slides.com