Tutotial # 2
CANNOT GET CODE TO EXECUTE WITHOUT ERRORS USING SEVERAL VARIATIONS.
batting = LOAD ‘Batting.csv’ USING PigStorage(‘,’);
raw_runs = FILTER batting BY $1>0;
runs = FOREACH raw_runs GENERATE $0 AS playerID, $1 AS year, $8 AS runs;
grp_data = GROUP runs BY (year);
max_runs = FOREACH grp_data GENERATE group as grp, MAX(runs.runs) AS max_runs;
join_max_runs = JOIN max_runs BY ($0, max_runs), runs BY (year, runs);
join_data = FOREACH join_max_runs GENERATE $0 AS year, $2 AS playerID, $1 AS runs;
You must be logged in to reply to this topic.