val RDD_united = sc.union(RDDs)
val withRemplacement=false;
val fraction = 1;
val seed = 42;
val RDD_analyzed = RDD_united.sample(withRemplacement,fraction,seed);
val RDD = RDD_analyzed.map(x=>x.split(";")).filter(e => !e(0).contains("REEXPEDITION"))
val RDD_listed = RDD.map(row=>parse_list(row))
val RDD_table = create_table(RDD_listed)
RDD_table.registerTempTable("plis")
val petitsformat1 = sqlContext.sql("SELECT count(*) FROM plis where MACAF_ID IS NOT NULL AND format='PF' group by id")
val petitsformat2 = sqlContext.sql("""SELECT id,
count(*),
AVG(date),
AVG(urgent),
AVG(cree),
AVG(ID_machine),
AVG(programme_arrivee),
AVG(programme_national),
AVG(OCR),
AVG(MACAF_TARIF),
AVG(service_os),
AVG(service_PND),
AVG(service_cedex),
AVG(service_lettreverte),
AVG(CI),
AVG(receptacle),
AVG(definitif),
AVG(Lot_sortie),
AVG(LEL),
AVG(MTEL),
AVG(TYPE_RAO),
AVG(DELTA_JOUR)
FROM
plis where MACAF_ID IS NOT NULL AND format='PF' group by id""")
[EA64A27C,1,3.0,0.0,1.0,7435.0,1.0,1.0,1.0,80.0,null,null,null,null,0.0,1.0,0.0,1.0,0.0,0.0,1.0,-2.0]
[E32634C7,2,4.0,1.0,1.0,5995.5,1.0,1.0,1.0,104.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,-1.0]
[DDBDA75D,1,5.0,1.0,1.0,6644.0,1.0,1.0,1.0,64.0,null,null,null,null,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0]
DATA VISUALISATION