summaryrefslogtreecommitdiff
path: root/tests/examplefiles/test.pig
diff options
context:
space:
mode:
Diffstat (limited to 'tests/examplefiles/test.pig')
-rw-r--r--tests/examplefiles/test.pig148
1 files changed, 0 insertions, 148 deletions
diff --git a/tests/examplefiles/test.pig b/tests/examplefiles/test.pig
deleted file mode 100644
index f67b0268..00000000
--- a/tests/examplefiles/test.pig
+++ /dev/null
@@ -1,148 +0,0 @@
-/**
- * This script is an example recommender (using made up data) showing how you might modify item-item links
- * by defining similar relations between items in a dataset and customizing the change in weighting.
- * This example creates metadata by using the genre field as the metadata_field. The items with
- * the same genre have it's weight cut in half in order to boost the signals of movies that do not have the same genre.
- * This technique requires a customization of the standard GetItemItemRecommendations macro
- */
-import 'recommenders.pig';
-
-
-
-%default INPUT_PATH_PURCHASES '../data/retail/purchases.json'
-%default INPUT_PATH_WISHLIST '../data/retail/wishlists.json'
-%default INPUT_PATH_INVENTORY '../data/retail/inventory.json'
-%default OUTPUT_PATH '../data/retail/out/modify_item_item'
-
-
-/******** Custom GetItemItemRecommnedations *********/
-define recsys__GetItemItemRecommendations_ModifyCustom(user_item_signals, metadata) returns item_item_recs {
-
- -- Convert user_item_signals to an item_item_graph
- ii_links_raw, item_weights = recsys__BuildItemItemGraph(
- $user_item_signals,
- $LOGISTIC_PARAM,
- $MIN_LINK_WEIGHT,
- $MAX_LINKS_PER_USER
- );
- -- NOTE this function is added in order to combine metadata with item-item links
- -- See macro for more detailed explination
- ii_links_metadata = recsys__AddMetadataToItemItemLinks(
- ii_links_raw,
- $metadata
- );
-
- /********* Custom Code starts here ********/
-
- --The code here should adjust the weights based on an item-item link and the equality of metadata.
- -- In this case, if the metadata is the same, the weight is reduced. Otherwise the weight is left alone.
- ii_links_adjusted = foreach ii_links_metadata generate item_A, item_B,
- -- the amount of weight adjusted is dependant on the domain of data and what is expected
- -- It is always best to adjust the weight by multiplying it by a factor rather than addition with a constant
- (metadata_B == metadata_A ? (weight * 0.5): weight) as weight;
-
-
- /******** Custom Code stops here *********/
-
- -- remove negative numbers just incase
- ii_links_adjusted_filt = foreach ii_links_adjusted generate item_A, item_B,
- (weight <= 0 ? 0: weight) as weight;
- -- Adjust the weights of the graph to improve recommendations.
- ii_links = recsys__AdjustItemItemGraphWeight(
- ii_links_adjusted_filt,
- item_weights,
- $BAYESIAN_PRIOR
- );
-
- -- Use the item-item graph to create item-item recommendations.
- $item_item_recs = recsys__BuildItemItemRecommendationsFromGraph(
- ii_links,
- $NUM_RECS_PER_ITEM,
- $NUM_RECS_PER_ITEM
- );
-};
-
-
-/******* Load Data **********/
-
---Get purchase signals
-purchase_input = load '$INPUT_PATH_PURCHASES' using org.apache.pig.piggybank.storage.JsonLoader(
- 'row_id: int,
- movie_id: chararray,
- movie_name: chararray,
- user_id: chararray,
- purchase_price: int');
-
---Get wishlist signals
-wishlist_input = load '$INPUT_PATH_WISHLIST' using org.apache.pig.piggybank.storage.JsonLoader(
- 'row_id: int,
- movie_id: chararray,
- movie_name: chararray,
- user_id: chararray');
-
-
-/******* Convert Data to Signals **********/
-
--- Start with choosing 1 as max weight for a signal.
-purchase_signals = foreach purchase_input generate
- user_id as user,
- movie_name as item,
- 1.0 as weight;
-
-
--- Start with choosing 0.5 as weight for wishlist items because that is a weaker signal than
--- purchasing an item.
-wishlist_signals = foreach wishlist_input generate
- user_id as user,
- movie_name as item,
- 0.5 as weight;
-
-user_signals = union purchase_signals, wishlist_signals;
-
-
-/******** Changes for Modifying item-item links ******/
-inventory_input = load '$INPUT_PATH_INVENTORY' using org.apache.pig.piggybank.storage.JsonLoader(
- 'movie_title: chararray,
- genres: bag{tuple(content:chararray)}');
-
-
-metadata = foreach inventory_input generate
- FLATTEN(genres) as metadata_field,
- movie_title as item;
--- requires the macro to be written seperately
- --NOTE this macro is defined within this file for clarity
-item_item_recs = recsys__GetItemItemRecommendations_ModifyCustom(user_signals, metadata);
-/******* No more changes ********/
-
-
-user_item_recs = recsys__GetUserItemRecommendations(user_signals, item_item_recs);
-
---Completely unrelated code stuck in the middle
-data = LOAD 's3n://my-s3-bucket/path/to/responses'
- USING org.apache.pig.piggybank.storage.JsonLoader();
-responses = FOREACH data GENERATE object#'response' AS response: map[];
-out = FOREACH responses
- GENERATE response#'id' AS id: int, response#'thread' AS thread: chararray,
- response#'comments' AS comments: {t: (comment: chararray)};
-STORE out INTO 's3n://path/to/output' USING PigStorage('|');
-
-
-/******* Store recommendations **********/
-
--- If your output folder exists already, hadoop will refuse to write data to it.
-
-rmf $OUTPUT_PATH/item_item_recs;
-rmf $OUTPUT_PATH/user_item_recs;
-
-store item_item_recs into '$OUTPUT_PATH/item_item_recs' using PigStorage();
-store user_item_recs into '$OUTPUT_PATH/user_item_recs' using PigStorage();
-
--- STORE the item_item_recs into dynamo
-STORE item_item_recs
- INTO '$OUTPUT_PATH/unused-ii-table-data'
-USING com.mortardata.pig.storage.DynamoDBStorage('$II_TABLE', '$AWS_ACCESS_KEY_ID', '$AWS_SECRET_ACCESS_KEY');
-
--- STORE the user_item_recs into dynamo
-STORE user_item_recs
- INTO '$OUTPUT_PATH/unused-ui-table-data'
-USING com.mortardata.pig.storage.DynamoDBStorage('$UI_TABLE', '$AWS_ACCESS_KEY_ID', '$AWS_SECRET_ACCESS_KEY');