summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorShinya Maeda <shinya@gitlab.com>2019-07-18 16:22:46 +0700
committerShinya Maeda <shinya@gitlab.com>2019-07-23 17:26:08 +0700
commitc2e0e689f355555db231ac6db40ab1b654c90233 (patch)
tree1ab8b2e6561598a61a10a0197a975019ed13f464
parent1a3fda63a5f9756cde19bc7e221651b0c33cb5dc (diff)
downloadgitlab-ce-c2e0e689f355555db231ac6db40ab1b654c90233.tar.gz
Validate the existence of archived traces before removing live tracesafe-archiving-for-traces
Often live traces are removed even though the archived trace doesn't exist. This commit checkes the existence strictly.
-rw-r--r--app/models/ci/build.rb8
-rw-r--r--app/models/ci/job_artifact.rb4
-rw-r--r--app/services/ci/archive_trace_service.rb28
-rw-r--r--app/workers/archive_trace_worker.rb2
-rw-r--r--app/workers/ci/archive_traces_cron_worker.rb2
-rw-r--r--changelogs/unreleased/safe-archiving-for-traces.yml5
-rw-r--r--lib/gitlab/ci/trace.rb12
-rw-r--r--lib/gitlab/ci/trace/chunked_io.rb7
-rw-r--r--spec/lib/gitlab/ci/trace/chunked_io_spec.rb10
-rw-r--r--spec/models/ci/build_spec.rb28
-rw-r--r--spec/models/ci/job_artifact_spec.rb25
-rw-r--r--spec/services/ci/archive_trace_service_spec.rb38
-rw-r--r--spec/support/shared_examples/ci_trace_shared_examples.rb52
-rw-r--r--spec/workers/archive_trace_worker_spec.rb2
-rw-r--r--spec/workers/ci/archive_traces_cron_worker_spec.rb7
-rw-r--r--spec/workers/stuck_ci_jobs_worker_spec.rb4
16 files changed, 214 insertions, 20 deletions
diff --git a/app/models/ci/build.rb b/app/models/ci/build.rb
index 635fcc86166..da70cb9a9a7 100644
--- a/app/models/ci/build.rb
+++ b/app/models/ci/build.rb
@@ -531,6 +531,14 @@ module Ci
trace.exist?
end
+ def has_live_trace?
+ trace.live_trace_exist?
+ end
+
+ def has_archived_trace?
+ trace.archived_trace_exist?
+ end
+
def artifacts_file
job_artifacts_archive&.file
end
diff --git a/app/models/ci/job_artifact.rb b/app/models/ci/job_artifact.rb
index f80e98e5bca..e132cb045e2 100644
--- a/app/models/ci/job_artifact.rb
+++ b/app/models/ci/job_artifact.rb
@@ -176,6 +176,10 @@ module Ci
end
end
+ def self.archived_trace_exists_for?(job_id)
+ where(job_id: job_id).trace.take&.file&.file&.exists?
+ end
+
private
def file_format_adapter_class
diff --git a/app/services/ci/archive_trace_service.rb b/app/services/ci/archive_trace_service.rb
index b5cfa1d019c..700d78361a4 100644
--- a/app/services/ci/archive_trace_service.rb
+++ b/app/services/ci/archive_trace_service.rb
@@ -2,8 +2,25 @@
module Ci
class ArchiveTraceService
- def execute(job)
+ def execute(job, worker_name:)
+ # TODO: Remove this logging once we confirmed new live trace architecture is functional.
+ # See https://gitlab.com/gitlab-com/gl-infra/infrastructure/issues/4667.
+ unless job.has_live_trace?
+ Sidekiq.logger.warn(class: worker_name,
+ message: 'The job does not have live trace but going to be archived.',
+ job_id: job.id)
+ return
+ end
+
job.trace.archive!
+
+ # TODO: Remove this logging once we confirmed new live trace architecture is functional.
+ # See https://gitlab.com/gitlab-com/gl-infra/infrastructure/issues/4667.
+ unless job.has_archived_trace?
+ Sidekiq.logger.warn(class: worker_name,
+ message: 'The job does not have archived trace after archiving.',
+ job_id: job.id)
+ end
rescue ::Gitlab::Ci::Trace::AlreadyArchivedError
# It's already archived, thus we can safely ignore this exception.
rescue => e
@@ -11,7 +28,7 @@ module Ci
# If `archive!` keeps failing for over a week, that could incur data loss.
# (See more https://docs.gitlab.com/ee/administration/job_traces.html#new-live-trace-architecture)
# In order to avoid interrupting the system, we do not raise an exception here.
- archive_error(e, job)
+ archive_error(e, job, worker_name)
end
private
@@ -22,9 +39,12 @@ module Ci
"Counter of failed attempts of trace archiving")
end
- def archive_error(error, job)
+ def archive_error(error, job, worker_name)
failed_archive_counter.increment
- Rails.logger.error "Failed to archive trace. id: #{job.id} message: #{error.message}" # rubocop:disable Gitlab/RailsLogger
+
+ Sidekiq.logger.warn(class: worker_name,
+ message: "Failed to archive trace. message: #{error.message}.",
+ job_id: job.id)
Gitlab::Sentry
.track_exception(error,
diff --git a/app/workers/archive_trace_worker.rb b/app/workers/archive_trace_worker.rb
index 4a9becf0ca7..66f9b8d9e80 100644
--- a/app/workers/archive_trace_worker.rb
+++ b/app/workers/archive_trace_worker.rb
@@ -7,7 +7,7 @@ class ArchiveTraceWorker
# rubocop: disable CodeReuse/ActiveRecord
def perform(job_id)
Ci::Build.without_archived_trace.find_by(id: job_id).try do |job|
- Ci::ArchiveTraceService.new.execute(job)
+ Ci::ArchiveTraceService.new.execute(job, worker_name: self.class.name)
end
end
# rubocop: enable CodeReuse/ActiveRecord
diff --git a/app/workers/ci/archive_traces_cron_worker.rb b/app/workers/ci/archive_traces_cron_worker.rb
index f65ff239866..75e68d0233a 100644
--- a/app/workers/ci/archive_traces_cron_worker.rb
+++ b/app/workers/ci/archive_traces_cron_worker.rb
@@ -11,7 +11,7 @@ module Ci
# This could happen when ArchiveTraceWorker sidekiq jobs were lost by receiving SIGKILL
# More details in https://gitlab.com/gitlab-org/gitlab-ce/issues/36791
Ci::Build.finished.with_live_trace.find_each(batch_size: 100) do |build|
- Ci::ArchiveTraceService.new.execute(build)
+ Ci::ArchiveTraceService.new.execute(build, worker_name: self.class.name)
end
end
# rubocop: enable CodeReuse/ActiveRecord
diff --git a/changelogs/unreleased/safe-archiving-for-traces.yml b/changelogs/unreleased/safe-archiving-for-traces.yml
new file mode 100644
index 00000000000..2b9070bacfe
--- /dev/null
+++ b/changelogs/unreleased/safe-archiving-for-traces.yml
@@ -0,0 +1,5 @@
+---
+title: Extra logging for new live trace architecture
+merge_request: 30892
+author:
+type: fixed
diff --git a/lib/gitlab/ci/trace.rb b/lib/gitlab/ci/trace.rb
index ce5857965bf..cb617080c76 100644
--- a/lib/gitlab/ci/trace.rb
+++ b/lib/gitlab/ci/trace.rb
@@ -63,7 +63,15 @@ module Gitlab
end
def exist?
- trace_artifact&.exists? || job.trace_chunks.any? || current_path.present? || old_trace.present?
+ archived_trace_exist? || live_trace_exist?
+ end
+
+ def archived_trace_exist?
+ trace_artifact&.exists?
+ end
+
+ def live_trace_exist?
+ job.trace_chunks.any? || current_path.present? || old_trace.present?
end
def read
@@ -167,7 +175,7 @@ module Gitlab
def clone_file!(src_stream, temp_dir)
FileUtils.mkdir_p(temp_dir)
- Dir.mktmpdir('tmp-trace', temp_dir) do |dir_path|
+ Dir.mktmpdir("tmp-trace-#{job.id}", temp_dir) do |dir_path|
temp_path = File.join(dir_path, "job.log")
FileUtils.touch(temp_path)
size = IO.copy_stream(src_stream, temp_path)
diff --git a/lib/gitlab/ci/trace/chunked_io.rb b/lib/gitlab/ci/trace/chunked_io.rb
index 8c6fd56493f..e99889f4a25 100644
--- a/lib/gitlab/ci/trace/chunked_io.rb
+++ b/lib/gitlab/ci/trace/chunked_io.rb
@@ -166,6 +166,13 @@ module Gitlab
end
def destroy!
+ # TODO: Remove this logging once we confirmed new live trace architecture is functional.
+ # See https://gitlab.com/gitlab-com/gl-infra/infrastructure/issues/4667.
+ unless build.has_archived_trace?
+ Sidekiq.logger.warn(message: 'The job does not have archived trace but going to be destroyed.',
+ job_id: build.id)
+ end
+
trace_chunks.fast_destroy_all
@tell = @size = 0
ensure
diff --git a/spec/lib/gitlab/ci/trace/chunked_io_spec.rb b/spec/lib/gitlab/ci/trace/chunked_io_spec.rb
index 546a9e7d0cc..3e5cd81f929 100644
--- a/spec/lib/gitlab/ci/trace/chunked_io_spec.rb
+++ b/spec/lib/gitlab/ci/trace/chunked_io_spec.rb
@@ -442,5 +442,15 @@ describe Gitlab::Ci::Trace::ChunkedIO, :clean_gitlab_redis_cache do
expect(Ci::BuildTraceChunk.where(build: build).count).to eq(0)
end
+
+ context 'when the job does not have archived trace' do
+ it 'leaves a message in sidekiq log' do
+ expect(Sidekiq.logger).to receive(:warn).with(
+ message: 'The job does not have archived trace but going to be destroyed.',
+ job_id: build.id).and_call_original
+
+ subject
+ end
+ end
end
end
diff --git a/spec/models/ci/build_spec.rb b/spec/models/ci/build_spec.rb
index 78862de0657..c30cb70e1c1 100644
--- a/spec/models/ci/build_spec.rb
+++ b/spec/models/ci/build_spec.rb
@@ -692,6 +692,34 @@ describe Ci::Build do
end
end
+ describe '#has_live_trace?' do
+ subject { build.has_live_trace? }
+
+ let(:build) { create(:ci_build, :trace_live) }
+
+ it { is_expected.to be_truthy }
+
+ context 'when build does not have live trace' do
+ let(:build) { create(:ci_build) }
+
+ it { is_expected.to be_falsy }
+ end
+ end
+
+ describe '#has_archived_trace?' do
+ subject { build.has_archived_trace? }
+
+ let(:build) { create(:ci_build, :trace_artifact) }
+
+ it { is_expected.to be_truthy }
+
+ context 'when build does not have archived trace' do
+ let(:build) { create(:ci_build) }
+
+ it { is_expected.to be_falsy }
+ end
+ end
+
describe '#has_job_artifacts?' do
subject { build.has_job_artifacts? }
diff --git a/spec/models/ci/job_artifact_spec.rb b/spec/models/ci/job_artifact_spec.rb
index 1ba66565e03..1413da231e0 100644
--- a/spec/models/ci/job_artifact_spec.rb
+++ b/spec/models/ci/job_artifact_spec.rb
@@ -70,6 +70,31 @@ describe Ci::JobArtifact do
end
end
+ describe '.archived_trace_exists_for?' do
+ subject { described_class.archived_trace_exists_for?(job_id) }
+
+ let!(:artifact) { create(:ci_job_artifact, :trace, job: job) }
+ let(:job) { create(:ci_build) }
+
+ context 'when the specified job_id exists' do
+ let(:job_id) { job.id }
+
+ it { is_expected.to be_truthy }
+
+ context 'when the job does have archived trace' do
+ let!(:artifact) { }
+
+ it { is_expected.to be_falsy }
+ end
+ end
+
+ context 'when the specified job_id does not exist' do
+ let(:job_id) { 10000 }
+
+ it { is_expected.to be_falsy }
+ end
+ end
+
describe 'callbacks' do
subject { create(:ci_job_artifact, :archive) }
diff --git a/spec/services/ci/archive_trace_service_spec.rb b/spec/services/ci/archive_trace_service_spec.rb
index 44a77c29086..454db3d5a48 100644
--- a/spec/services/ci/archive_trace_service_spec.rb
+++ b/spec/services/ci/archive_trace_service_spec.rb
@@ -3,7 +3,7 @@
require 'spec_helper'
describe Ci::ArchiveTraceService, '#execute' do
- subject { described_class.new.execute(job) }
+ subject { described_class.new.execute(job, worker_name: ArchiveTraceWorker.name) }
context 'when job is finished' do
let(:job) { create(:ci_build, :success, :trace_live) }
@@ -25,6 +25,34 @@ describe Ci::ArchiveTraceService, '#execute' do
expect { subject }.not_to change { Ci::JobArtifact.trace.count }
end
end
+
+ context 'when job does not have trace' do
+ let(:job) { create(:ci_build, :success) }
+
+ it 'leaves a warning message in sidekiq log' do
+ expect(Sidekiq.logger).to receive(:warn).with(
+ class: ArchiveTraceWorker.name,
+ message: 'The job does not have live trace but going to be archived.',
+ job_id: job.id)
+
+ subject
+ end
+ end
+
+ context 'when job failed to archive trace but did not raise an exception' do
+ before do
+ allow_any_instance_of(Gitlab::Ci::Trace).to receive(:archive!) {}
+ end
+
+ it 'leaves a warning message in sidekiq log' do
+ expect(Sidekiq.logger).to receive(:warn).with(
+ class: ArchiveTraceWorker.name,
+ message: 'The job does not have archived trace after archiving.',
+ job_id: job.id)
+
+ subject
+ end
+ end
end
context 'when job is running' do
@@ -37,10 +65,10 @@ describe Ci::ArchiveTraceService, '#execute' do
issue_url: 'https://gitlab.com/gitlab-org/gitlab-ce/issues/51502',
extra: { job_id: job.id } ).once
- expect(Rails.logger)
- .to receive(:error)
- .with("Failed to archive trace. id: #{job.id} message: Job is not finished yet")
- .and_call_original
+ expect(Sidekiq.logger).to receive(:warn).with(
+ class: ArchiveTraceWorker.name,
+ message: "Failed to archive trace. message: Job is not finished yet.",
+ job_id: job.id).and_call_original
expect(Gitlab::Metrics)
.to receive(:counter)
diff --git a/spec/support/shared_examples/ci_trace_shared_examples.rb b/spec/support/shared_examples/ci_trace_shared_examples.rb
index ab0550e2613..68c2b6e10e2 100644
--- a/spec/support/shared_examples/ci_trace_shared_examples.rb
+++ b/spec/support/shared_examples/ci_trace_shared_examples.rb
@@ -720,6 +720,58 @@ shared_examples_for 'trace with enabled live trace feature' do
end
end
+ describe '#archived_trace_exist?' do
+ subject { trace.archived_trace_exist? }
+
+ context 'when trace does not exist' do
+ it { is_expected.to be_falsy }
+ end
+
+ context 'when archived trace exists' do
+ before do
+ create(:ci_job_artifact, :trace, job: build)
+ end
+
+ it { is_expected.to be_truthy }
+ end
+
+ context 'when live trace exists' do
+ before do
+ Gitlab::Ci::Trace::ChunkedIO.new(build) do |stream|
+ stream.write('abc')
+ end
+ end
+
+ it { is_expected.to be_falsy }
+ end
+ end
+
+ describe '#live_trace_exist?' do
+ subject { trace.live_trace_exist? }
+
+ context 'when trace does not exist' do
+ it { is_expected.to be_falsy }
+ end
+
+ context 'when archived trace exists' do
+ before do
+ create(:ci_job_artifact, :trace, job: build)
+ end
+
+ it { is_expected.to be_falsy }
+ end
+
+ context 'when live trace exists' do
+ before do
+ Gitlab::Ci::Trace::ChunkedIO.new(build) do |stream|
+ stream.write('abc')
+ end
+ end
+
+ it { is_expected.to be_truthy }
+ end
+ end
+
describe '#archive!' do
subject { trace.archive! }
diff --git a/spec/workers/archive_trace_worker_spec.rb b/spec/workers/archive_trace_worker_spec.rb
index 368ed3f3db1..44f7be15201 100644
--- a/spec/workers/archive_trace_worker_spec.rb
+++ b/spec/workers/archive_trace_worker_spec.rb
@@ -11,7 +11,7 @@ describe ArchiveTraceWorker do
it 'executes service' do
expect_any_instance_of(Ci::ArchiveTraceService)
- .to receive(:execute).with(job)
+ .to receive(:execute).with(job, anything)
subject
end
diff --git a/spec/workers/ci/archive_traces_cron_worker_spec.rb b/spec/workers/ci/archive_traces_cron_worker_spec.rb
index eca6cf5235f..28381fdc3be 100644
--- a/spec/workers/ci/archive_traces_cron_worker_spec.rb
+++ b/spec/workers/ci/archive_traces_cron_worker_spec.rb
@@ -34,7 +34,7 @@ describe Ci::ArchiveTracesCronWorker do
it 'executes service' do
expect_any_instance_of(Ci::ArchiveTraceService)
- .to receive(:execute).with(build)
+ .to receive(:execute).with(build, anything)
subject
end
@@ -60,7 +60,10 @@ describe Ci::ArchiveTracesCronWorker do
end
it 'puts a log' do
- expect(Rails.logger).to receive(:error).with("Failed to archive trace. id: #{build.id} message: Unexpected error")
+ expect(Sidekiq.logger).to receive(:warn).with(
+ class: described_class.name,
+ message: "Failed to archive trace. message: Unexpected error.",
+ job_id: build.id)
subject
end
diff --git a/spec/workers/stuck_ci_jobs_worker_spec.rb b/spec/workers/stuck_ci_jobs_worker_spec.rb
index 72de62f1188..c3d577e2dae 100644
--- a/spec/workers/stuck_ci_jobs_worker_spec.rb
+++ b/spec/workers/stuck_ci_jobs_worker_spec.rb
@@ -7,8 +7,6 @@ describe StuckCiJobsWorker do
let!(:runner) { create :ci_runner }
let!(:job) { create :ci_build, runner: runner }
- let(:trace_lease_key) { "trace:write:lock:#{job.id}" }
- let(:trace_lease_uuid) { SecureRandom.uuid }
let(:worker_lease_key) { StuckCiJobsWorker::EXCLUSIVE_LEASE_KEY }
let(:worker_lease_uuid) { SecureRandom.uuid }
@@ -16,7 +14,6 @@ describe StuckCiJobsWorker do
before do
stub_exclusive_lease(worker_lease_key, worker_lease_uuid)
- stub_exclusive_lease(trace_lease_key, trace_lease_uuid)
job.update!(status: status, updated_at: updated_at)
end
@@ -195,7 +192,6 @@ describe StuckCiJobsWorker do
end
it 'cancels exclusive leases after worker perform' do
- expect_to_cancel_exclusive_lease(trace_lease_key, trace_lease_uuid)
expect_to_cancel_exclusive_lease(worker_lease_key, worker_lease_uuid)
worker.perform