Currently running on WMF analytics cluster (stat1002)
#logrotate is at 6:25, + time for rsync (hourly?), 12 gives us roughly 6 hours
0 12 * * * cd ~/wikidata-data/src/api/getclaims_property_use && ./generate.sh && ./export.sh
#It doesn't matter when these run
0 6 * * * cd ~/wikidata-data/src/social/facebook && php ./generate.php && ./export.sh
1 6 * * * cd ~/wikidata-data/src/social/googleplus && php ./generate.php && ./export.sh
2 6 * * * cd ~/wikidata-data/src/social/identica && php ./generate.php && ./export.sh
3 6 * * * cd ~/wikidata-data/src/social/irc && php ./generate.php && ./export.sh
4 6 * * * cd ~/wikidata-data/src/social/mail && php ./generate.php && ./export.sh
5 6 * * * cd ~/wikidata-data/src/social/newsletter && php ./generate.php && ./export.sh
6 6 * * * cd ~/wikidata-data/src/social/techmail && php ./generate.php && ./export.sh
7 6 * * * cd ~/wikidata-data/src/social/twitter && php ./generate.php && ./export.sh
10 6 * * * cd ~/wikidata-data/src/site_stats/active_users && ./generate.sh && ./export.sh
11 6 * * * cd ~/wikidata-data/src/site_stats/good_articles && ./generate.sh && ./export.sh
12 6 * * * cd ~/wikidata-data/src/site_stats/total_edits && ./generate.sh && ./export.sh
13 6 * * * cd ~/wikidata-data/src/site_stats/total_pages && ./generate.sh && ./export.sh
14 6 * * * cd ~/wikidata-data/src/site_stats/total_views && ./generate.sh && ./export.sh
15 6 * * * cd ~/wikidata-data/src/site_stats/users && ./generate.sh && ./export.sh
16 6 * * * cd ~/wikidata-data/src/site_stats/admins && ./generate.sh && ./export.sh
17 6 * * * cd ~/wikidata-data/src/site_stats/bureaucrats && ./generate.sh && ./export.sh
Currently storing on WMF analytics db slave (analytics-store)
mysql:research@analytics-store.eqiad.wmnet [staging]> show tables;
+--------------------------------------+
| Tables_in_staging |
+--------------------------------------+
| .... |
| wikidata_getclaims_property_use |
| wikidata_site_stats |
| wikidata_site_stats_active_users |
| wikidata_site_stats_admins |
| wikidata_site_stats_bureaucrats |
| wikidata_site_stats_good_articles |
| wikidata_site_stats_total_edits |
| wikidata_site_stats_total_pages |
| wikidata_site_stats_total_views |
| wikidata_site_stats_users |
| wikidata_social |
| wikidata_social_facebook |
| wikidata_social_googleplus |
| wikidata_social_identica |
| wikidata_social_irc |
| wikidata_social_mail |
| wikidata_social_newsletter |
| wikidata_social_techmail |
| wikidata_social_twitter |
| .... |
+--------------------------------------+
176 rows in set (0.03 sec)
mysql:research@analytics-store.eqiad.wmnet [staging]>
SELECT *
FROM wikidata_social_twitter
LIMIT 6;
+------------+-----------+
| date | followers |
+------------+-----------+
| 2015-09-24 | 4799 |
| 2015-09-25 | 4804 |
| 2015-09-26 | 4806 |
| 2015-09-27 | 4806 |
| 2015-09-28 | 4811 |
| 2015-09-29 | 4815 |
+------------+-----------+
6 rows in set (0.00 sec)
Exporting TSVs to an endpoint
date followers
2012-03-06 0
2012-03-16 0
2012-04-01 156
2012-05-01 341
2012-06-01 433
2012-07-01 518
2012-07-19 594
2012-08-01 638
2012-09-03 703
2012-10-01 772
2012-11-01 863
2012-12-01 982
2013-01-01 1048
2013-01-15 1072
2013-02-01 1121
2013-02-14 1158
2013-03-01 1215
2013-03-28 1326
2013-04-01 1337
2013-04-25 1436
2013-05-04 1498
2013-05-29 1571
2013-06-27 1672
2013-08-15 1795
2013-09-03 1860
2013-10-01 1948
2013-11-02 2026
2013-12-01 2099
2014-01-01 2173
2014-01-29 2259
2014-03-09 2468
2014-04-09 2663
2014-05-01 2729
2014-06-23 2916
2014-09-25 3348
2014-12-23 3720
2015-01-09 3788
2015-03-03 4039
2015-04-01 4203
2015-05-04 4306
2015-06-11 4460
2015-07-12 4540
2015-08-14 4646
2015-09-14 4772
2015-09-30 4820
2015-10-01 4822
2015-10-02 4830
2015-10-03 4835
2015-10-04 4840
2015-10-05 4843
2015-10-06 4845
2015-10-07 4850
2015-10-08 4856
2015-10-09 4855
2015-10-10 4863
2015-10-11 4870
2015-10-12 4872
2015-10-13 4875
2015-10-14 4878
2015-10-15 4883
2015-10-16 4882
2015-10-17 4882
2015-10-18 4885
2015-10-19 4887
2015-10-20 4892
2015-10-21 4891
2015-10-22 4894
2015-09-24 4799
2015-09-25 4804
2015-09-26 4806
2015-09-27 4806
2015-09-28 4811
2015-09-29 4815
storage_schemas => {
# Retain aggregated data at a one-minute resolution for one week; at
# five-minute resolution for two weeks; at 15-minute resolution for
# one month; and at one-hour resolution for one year.
'default' => {
pattern => '.*',
retentions => '1m:7d,5m:14d,15m:30d,1h:1y',
},
# Retains some specific wikidata daily metrics for much longer
'wikidata-daily' => {
pattern => '^wikidata\.daily\..*',
retentions => '1d:5y,7d:100y',
},
},
Retention and resolution can be tweaked
Generation scripts will send data straight to statsd
$response = $this->curlGet( 'http://m.facebook.com/wikidata' );
preg_match( '/([\d,]+) people like this/i', $response, $matches );
$count = str_replace( ',', '', $matches[1] );
$client = new StatsdClient( new SocketSender( 'statsd.eqiad.wmnet', 8126, 'udp' ) );
$factory = new StatsdDataFactory( '\Liuggio\StatsdClient\Entity\StatsdData' );
$service = new StatsdService( $client, $factory );
$service->gauge('wikidata.daily.facebook.likes', $count);
We could add data directly from Wikidata & Wikibase extensions
$statsd = RequestContext::getMain()->getStats();
for ( $c = 0; $c < $maxPasses; ) {
if ( $t > $maxTime ) {
$this->trace( "Reached max time after $t seconds." );
$statsd->increment( 'wikibase.maintenance.dispatchchanges.maxtime' );
break;
}
$c++;
$this->trace( "Picking a client wiki..." );
$wikiState = $dispatcher->selectClient();
if ( $wikiState ) {
$statsd->increment( 'wikibase.maintenance.dispatchchanges.dispatch.all' );
$statsd->increment( "wikibase.maintenance.dispatchchanges.dispatch.$wikiState" );
$dispatcher->dispatchTo( $wikiState );
} else {
// No client found
$statsd->increment( 'wikibase.maintenance.dispatchchanges.noclient' );
}
$t = ( time() - $startTime );
}
Lots of tools can use data from graphite.!
Including shiny!
More metrics!