6 files changed, 114 insertions, 6 deletions
diff --git a/test/simple_encode_test.cc b/test/simple_encode_test.cc
index 684835168..9a938a8d1 100644
--- a/test/simple_encode_test.cc
+++ b/test/simple_encode_test.cc
@@ -60,6 +60,27 @@ TEST_F(SimpleEncodeTest, ComputeFirstPassStats) {
   }
 }
 
+TEST_F(SimpleEncodeTest, ObserveFirstPassMotionVectors) {
+  SimpleEncode simple_encode(width_, height_, frame_rate_num_, frame_rate_den_,
+                             target_bitrate_, num_frames_,
+                             in_file_path_str_.c_str());
+  simple_encode.ComputeFirstPassStats();
+  std::vector<std::vector<MotionVectorInfo>> fps_motion_vectors =
+      simple_encode.ObserveFirstPassMotionVectors();
+  EXPECT_EQ(fps_motion_vectors.size(), static_cast<size_t>(num_frames_));
+  const size_t num_blocks = ((width_ + 15) >> 4) * ((height_ + 15) >> 4);
+  EXPECT_EQ(num_blocks, fps_motion_vectors[0].size());
+  for (size_t i = 0; i < fps_motion_vectors.size(); ++i) {
+    EXPECT_EQ(num_blocks, fps_motion_vectors[i].size());
+    for (size_t j = 0; j < num_blocks; ++j) {
+      const int mv_count = fps_motion_vectors[i][j].mv_count;
+      const int ref_count = (fps_motion_vectors[i][j].ref_frame[0] > 0) +
+                            (fps_motion_vectors[i][j].ref_frame[1] > 0);
+      EXPECT_EQ(mv_count, ref_count);
+    }
+  }
+}
+
 TEST_F(SimpleEncodeTest, GetCodingFrameNum) {
   SimpleEncode simple_encode(width_, height_, frame_rate_num_, frame_rate_den_,
                              target_bitrate_, num_frames_,
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index 68669ebf6..5471f9902 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -1024,6 +1024,7 @@ static void dealloc_compressor_data(VP9_COMP *cpi) {
 #if CONFIG_RATE_CTRL
   free_partition_info(cpi);
   free_motion_vector_info(cpi);
+  free_fp_motion_vector_info(cpi);
 #endif
 
   vp9_free_ref_frame_buffers(cm->buffer_pool);
@@ -2661,6 +2662,7 @@ VP9_COMP *vp9_create_compressor(const VP9EncoderConfig *oxcf,
   encode_command_init(&cpi->encode_command);
   partition_info_init(cpi);
   motion_vector_info_init(cpi);
+  fp_motion_vector_info_init(cpi);
 #endif
 
   return cpi;
diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h
index 31fbce24f..85071459f 100644
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -642,6 +642,9 @@ static INLINE void encode_command_init(ENCODE_COMMAND *encode_command) {
 // Returns number of units in size of 4, if not multiple not a multiple of 4,
 // round it up. For example, size is 7, return 2.
 static INLINE int get_num_unit_4x4(int size) { return (size + 3) >> 2; }
+// Returns number of units in size of 16, if not multiple not a multiple of 16,
+// round it up. For example, size is 17, return 2.
+static INLINE int get_num_unit_16x16(int size) { return (size + 15) >> 4; }
 #endif  // CONFIG_RATE_CTRL
 
 typedef struct VP9_COMP {
@@ -952,6 +955,7 @@ typedef struct VP9_COMP {
   ENCODE_COMMAND encode_command;
   PARTITION_INFO *partition_info;
   MOTION_VECTOR_INFO *motion_vector_info;
+  MOTION_VECTOR_INFO *fp_motion_vector_info;
 
   RATE_QSTEP_MODEL rq_model[ENCODE_FRAME_TYPES];
 #endif
@@ -1000,6 +1004,27 @@ static INLINE void free_motion_vector_info(struct VP9_COMP *cpi) {
   cpi->motion_vector_info = NULL;
 }
 
+// Allocates memory for the first pass motion vector information.
+// The unit size is each 16x16 block.
+// Only called once in vp9_create_compressor().
+static INLINE void fp_motion_vector_info_init(struct VP9_COMP *cpi) {
+  VP9_COMMON *const cm = &cpi->common;
+  const int unit_width = get_num_unit_16x16(cpi->frame_info.frame_width);
+  const int unit_height = get_num_unit_16x16(cpi->frame_info.frame_height);
+  CHECK_MEM_ERROR(cm, cpi->fp_motion_vector_info,
+                  (MOTION_VECTOR_INFO *)vpx_calloc(unit_width * unit_height,
+                                                   sizeof(MOTION_VECTOR_INFO)));
+  memset(cpi->fp_motion_vector_info, 0,
+         unit_width * unit_height * sizeof(MOTION_VECTOR_INFO));
+}
+
+// Frees memory of the first pass motion vector information.
+// Only called once in dealloc_compressor_data().
+static INLINE void free_fp_motion_vector_info(struct VP9_COMP *cpi) {
+  vpx_free(cpi->fp_motion_vector_info);
+  cpi->fp_motion_vector_info = NULL;
+}
+
 // This is the c-version counter part of ImageBuffer
 typedef struct IMAGE_BUFFER {
   int allocated;
@@ -1021,6 +1046,7 @@ typedef struct ENCODE_FRAME_RESULT {
   FRAME_COUNTS frame_counts;
   const PARTITION_INFO *partition_info;
   const MOTION_VECTOR_INFO *motion_vector_info;
+  const MOTION_VECTOR_INFO *fp_motion_vector_info;
   IMAGE_BUFFER coded_frame;
 #endif  // CONFIG_RATE_CTRL
   int quantize_index;
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index 45b003e1e..009bab7c5 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -839,6 +839,26 @@ static void accumulate_fp_mb_row_stat(TileDataEnc *this_tile,
                    fp_acc_data->image_data_start_row);
 }
 
+#if CONFIG_RATE_CTRL
+static void store_fp_motion_vector(VP9_COMP *cpi, const MV *mv,
+                                   const int mb_row, const int mb_col,
+                                   const int is_second_mv) {
+  VP9_COMMON *const cm = &cpi->common;
+  const int mb_index = mb_row * cm->mb_cols + mb_col;
+  MOTION_VECTOR_INFO *this_motion_vector_info =
+      &cpi->fp_motion_vector_info[mb_index];
+  if (!is_second_mv) {
+    this_motion_vector_info->ref_frame[0] = LAST_FRAME;
+    this_motion_vector_info->mv[0].as_mv.row = mv->row;
+    this_motion_vector_info->mv[0].as_mv.col = mv->col;
+    return;
+  }
+  this_motion_vector_info->ref_frame[1] = GOLDEN_FRAME;
+  this_motion_vector_info->mv[1].as_mv.row = mv->row;
+  this_motion_vector_info->mv[1].as_mv.col = mv->col;
+}
+#endif  // CONFIG_RATE_CTRL
+
 #define NZ_MOTION_PENALTY 128
 #define INTRA_MODE_PENALTY 1024
 void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td,
@@ -1137,6 +1157,9 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td,
                 vp9_get_mvpred_var(x, &tmp_mv, &zero_mv, &v_fn_ptr, 0);
           }
         }
+#if CONFIG_RATE_CTRL
+        store_fp_motion_vector(cpi, &mv, mb_row, mb_col, /*is_second_mv=*/0);
+#endif  // CONFIG_RAGE_CTRL
 
         // Search in an older reference frame.
         if ((cm->current_video_frame > 1) && gld_yv12 != NULL) {
@@ -1158,6 +1181,10 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td,
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 
           first_pass_motion_search(cpi, x, &zero_mv, &tmp_mv, &gf_motion_error);
+#if CONFIG_RATE_CTRL
+          store_fp_motion_vector(cpi, &tmp_mv, mb_row, mb_col,
+                                 /*is_second_mv=*/1);
+#endif  // CONFIG_RAGE_CTRL
 
           if (gf_motion_error < motion_error && gf_motion_error < this_error)
             ++(fp_acc_data->second_ref_count);
diff --git a/vp9/simple_encode.cc b/vp9/simple_encode.cc
index d083a44c2..678cf8add 100644
--- a/vp9/simple_encode.cc
+++ b/vp9/simple_encode.cc
@@ -471,8 +471,8 @@ static bool init_encode_frame_result(EncodeFrameResult *encode_frame_result,
   encode_frame_result->coding_data.reset(
       new (std::nothrow) uint8_t[max_coding_data_byte_size]);
 
-  encode_frame_result->num_rows_4x4 = get_num_unit_4x4(frame_width);
-  encode_frame_result->num_cols_4x4 = get_num_unit_4x4(frame_height);
+  encode_frame_result->num_rows_4x4 = get_num_unit_4x4(frame_height);
+  encode_frame_result->num_cols_4x4 = get_num_unit_4x4(frame_width);
   encode_frame_result->partition_info.resize(encode_frame_result->num_rows_4x4 *
                                              encode_frame_result->num_cols_4x4);
   encode_frame_result->motion_vector_info.resize(
@@ -742,6 +742,8 @@ void SimpleEncode::ComputeFirstPassStats() {
   struct lookahead_ctx *lookahead = cpi->lookahead;
   int i;
   int use_highbitdepth = 0;
+  const int num_rows_16x16 = get_num_unit_16x16(frame_height_);
+  const int num_cols_16x16 = get_num_unit_16x16(frame_width_);
 #if CONFIG_VP9_HIGHBITDEPTH
   use_highbitdepth = cpi->common.use_highbitdepth;
 #endif
@@ -774,6 +776,12 @@ void SimpleEncode::ComputeFirstPassStats() {
         // vp9_get_compressed_data only generates first pass stats not
         // compresses data
         assert(size == 0);
+        // Get vp9 first pass motion vector info.
+        std::vector<MotionVectorInfo> mv_info(num_rows_16x16 * num_cols_16x16);
+        update_motion_vector_info(&encode_frame_info.fp_motion_vector_info[0],
+                                  num_rows_16x16, num_cols_16x16,
+                                  mv_info.data());
+        fp_motion_vector_info_.push_back(mv_info);
       }
       impl_ptr_->first_pass_stats.push_back(vp9_get_frame_stats(&cpi->twopass));
     }
@@ -811,6 +819,11 @@ std::vector<std::vector<double>> SimpleEncode::ObserveFirstPassStats() {
   return output_stats;
 }
 
+std::vector<std::vector<MotionVectorInfo>>
+SimpleEncode::ObserveFirstPassMotionVectors() {
+  return fp_motion_vector_info_;
+}
+
 void SimpleEncode::SetExternalGroupOfPicturesMap(int *gop_map,
                                                  int gop_map_size) {
   for (int i = 0; i < gop_map_size; ++i) {
diff --git a/vp9/simple_encode.h b/vp9/simple_encode.h
index ae36eb2c5..6c66aafda 100644
--- a/vp9/simple_encode.h
+++ b/vp9/simple_encode.h
@@ -60,7 +60,9 @@ struct PartitionInfo {
 
 constexpr int kMotionVectorPrecision = 8;
 
-// The frame is split to 4x4 blocks.
+// In the first pass. The frame is split to 16x16 blocks.
+// This structure contains the information of each 16x16 block.
+// In the second pass. The frame is split to 4x4 blocks.
 // This structure contains the information of each 4x4 block.
 struct MotionVectorInfo {
   // Number of valid motion vectors, always 0 if this block is in the key frame.
@@ -68,8 +70,8 @@ struct MotionVectorInfo {
   int mv_count;
   // The reference frame for motion vectors. If the second motion vector does
   // not exist (mv_count = 1), the reference frame is kNoneRefFrame.
-  // Otherwise, the reference frame is either kLastFrame, or kGoldenFrame,
-  // or kAltRefFrame.
+  // Otherwise, the reference frame is either kRefFrameTypeLast, or
+  // kRefFrameTypePast, or kRefFrameTypeFuture.
   RefFrameType ref_frame[2];
   // The row offset of motion vectors in the unit of pixel.
   // If the second motion vector does not exist, the value is 0.
@@ -245,7 +247,7 @@ struct EncodeFrameResult {
   std::vector<PartitionInfo> partition_info;
   // A vector of the motion vector information of the frame.
   // The number of elements is |num_rows_4x4| * |num_cols_4x4|.
-  // The frame is divided 4x4 blocks of |num_rows_4x4| rows and
+  // The frame is divided into 4x4 blocks of |num_rows_4x4| rows and
   // |num_cols_4x4| columns.
   // Each 4x4 block contains 0 motion vector if this is an intra predicted
   // frame (for example, the key frame). If the frame is inter predicted,
@@ -324,6 +326,12 @@ class SimpleEncode {
   // values. For details, please check FIRSTPASS_STATS in vp9_firstpass.h
   std::vector<std::vector<double>> ObserveFirstPassStats();
 
+  // Outputs the first pass motion vectors represented by a 2-D vector.
+  // One can use the frame index at first dimension to retrieve the mvs for
+  // each video frame. The frame is divided into 16x16 blocks. The number of
+  // elements is round_up(|num_rows_4x4| / 4) * round_up(|num_cols_4x4| / 4).
+  std::vector<std::vector<MotionVectorInfo>> ObserveFirstPassMotionVectors();
+
   // Ouputs a copy of key_frame_map_, a binary vector with size equal to the
   // number of show frames in the video. For each entry in the vector, 1
   // indicates the position is a key frame and 0 indicates it's not a key frame.
@@ -451,6 +459,17 @@ class SimpleEncode {
   // frame appears?
   // Reference frames info of the to-be-coded frame.
   RefFrameInfo ref_frame_info_;
+
+  // A 2-D vector of motion vector information of the frame collected
+  // from the first pass. The first dimension is the frame index.
+  // Each frame is divided into 16x16 blocks. The number of elements is
+  // round_up(|num_rows_4x4| / 4) * round_up(|num_cols_4x4| / 4).
+  // Each 16x16 block contains 0 motion vector if this is an intra predicted
+  // frame (for example, the key frame). If the frame is inter predicted,
+  // each 16x16 block contains either 1 or 2 motion vectors.
+  // The first motion vector is always from the LAST_FRAME.
+  // The second motion vector is always from the GOLDEN_FRAME.
+  std::vector<std::vector<MotionVectorInfo>> fp_motion_vector_info_;
 };
 
 }  // namespace vp9