summaryrefslogtreecommitdiff
path: root/util/temp_metrics.conf
blob: 6a2aa3fc30abd115aee4b2121c933956f7784844 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

description     "Temporary, quick-hack metrics collection & thermal daemon"
author          "chromium-os-dev@chromium.org"

# This is for quickly adding UMA stats that we may need for
# short-term experiments, when we don't have the time to add
# stuff to metrics_daemon.  That's where it should go in the
# long term.
#
# This is also currently doing a userland thermal loop to allow
# for quick experimentation.  This thermal loop will eventually
# move to the BIOS once the data from experiments help prove its
# efficacy.

start on started system-services
stop on stopping system-services
respawn

script
  TEMP_OFFSET=273  # difference between K (reported by EC) and C (used in UMA)
  PECI_TEMP_THRESHOLD=77  # CPU temperature threshold

  # Thermal loop fields
  CPU_MAX_FREQ_FIELD=1
  CPU_MIN_FREQ_FIELD=2
  GPU_MAX_FREQ_FIELD=3
  CPU_DUTY_CYCLE_FIELD=4
  PKG_POWER_LIMIT_FIELD=5

  # Thermal loop steps
  all_steps="
  1801000 800000 1150 0 0x180aa00dd8088 # no throttling
  1801000 800000 1150 0 0x180aa00dd8080 # cap pkg to 16W
  1801000 800000 1150 0 0x180aa00dd8078 # cap pkg to 15W
  1801000 800000 1150 0 0x180aa00dd8070 # cap pkg to 14W
  1801000 800000 1150 0 0x180aa00dd8068 # cap pkg to 13W
  1800000 800000 900 0 0x180aa00dd8068 # disable turbo
  1600000 800000 800 0 0x180aa00dd8068 # cap CPU & GPU frequency
  1400000 800000 700 0 0x180aa00dd8068 # cap CPU & GPU frequency
  1200000 800000 600 0 0x180aa00dd8068 # cap CPU & GPU frequency
  1000000 800000 500 0 0x180aa00dd8068 # cap CPU & GPU frequency
  800000 800000 400 0 0x180aa00dd8068 # cap CPU & GPU frequency
  800000 800000 350 0 0x180aa00dd8068 # cap CPU & GPU frequency
  800000 800000 350 0x1c 0x180aa00dd8068 # duty cycle CPU
  800000 800000 350 0x18 0x180aa00dd8068 # duty cycle CPU
  "
  max_steps=$(($(echo "$all_steps" | wc -l) - 3))

  get_step() {
    row=$(($1 + 2))
    out=$(echo "$all_steps" | awk "{if (NR==$row) print}")
    echo "$out"
  }

  get_field() {
    #out=$(echo "$2" | awk -v z=$1 '{print $z}')
    out=$(echo "$2" | awk "{print \$$1}")
    echo $out
  }

  get_peci_temp() {
    tempk=$(ectool temps 9 | sed 's/[^0-9]//g')
    tempc=$((tempk - $TEMP_OFFSET))
    echo $tempc
  }

  set_cpu_freq() {
    max_freq=$1
    min_freq=$2
    for cpu in /sys/devices/system/cpu/cpu?/cpufreq; do
      echo 800000 > $cpu/scaling_min_freq
      echo 800000 > $cpu/scaling_max_freq
      echo $max_freq > $cpu/scaling_max_freq
      echo $min_freq > $cpu/scaling_min_freq
    done
  }

  set_gpu_freq() {
    gpu_freq=$1
    echo $gpu_freq > /sys/kernel/debug/dri/0/i915_max_freq
  }

  set_duty_cycle() {
    duty_cycle=$1
    for i in 0 1 2 3; do
      wrmsr $i 0x19a $duty_cycle
    done
  }

  set_pkg_power_limit() {
    pwr_limit=$1
    wrmsr 0 0x610 $pwr_limit
  }

  disable_external_prochot() {
    wrmsr 0 0x1fc 0x000000000004005e
  }

  current_step=1
  new_step=0

  thermal_loop() {
    peci_temp=$1
    if [ $peci_temp -gt $PECI_TEMP_THRESHOLD ]; then
      if [ $current_step -ne $max_steps ]; then
        new_step=$(($current_step + 1))
      fi
    elif [ $peci_temp -lt $PECI_TEMP_THRESHOLD ]; then
      if [ $current_step -gt 0 ]; then
        new_step=$(($current_step - 1))
      fi
    else
      new_step=$current_step
    fi

    if  [ $new_step -eq $current_step ]; then
      return
    fi

    current_step=$new_step
    step=$(get_step $new_step)

    cpu_max_freq=$(get_field $CPU_MAX_FREQ_FIELD "$step")
    cpu_min_freq=$(get_field $CPU_MIN_FREQ_FIELD "$step")
    gpu_max_freq=$(get_field $GPU_MAX_FREQ_FIELD "$step")
    cpu_duty_cycle=$(get_field $CPU_DUTY_CYCLE_FIELD "$step")
    pkg_power_limit=$(get_field $PKG_POWER_LIMIT_FIELD "$step")

    set_cpu_freq $cpu_max_freq $cpu_min_freq
    set_gpu_freq $gpu_max_freq
    set_duty_cycle $cpu_duty_cycle
    set_pkg_power_limit $pkg_power_limit
  }

  fan_reset_thresholds() {
    temp_low1=105
    temp_low2=105
    temp_low3=105
    temp_low4=105
    temp_low5=105
  }

  last_rpm=10
  temp_low1=105
  temp_low2=105
  temp_low3=105
  temp_low4=105
  temp_low5=105

  fan_loop() {
    peci_temp=$1

    if [ $peci_temp -gt 77 ] || [ $peci_temp -gt $temp_low1 ]; then
      rpm=9300
      fan_reset_thresholds
      temp_low1=68
    elif [ $peci_temp -gt 68 ] || [ $peci_temp -gt $temp_low2 ]; then
      rpm=7725
      fan_reset_thresholds
      temp_low2=61
    elif [ $peci_temp -gt 61 ] || [ $peci_temp -gt $temp_low3 ]; then
      rpm=6150
      fan_reset_thresholds
      temp_low3=54
    elif [ $peci_temp -gt 54 ] || [ $peci_temp -gt $temp_low4 ]; then
      rpm=4575
      fan_reset_thresholds
      temp_low4=47
    elif [ $peci_temp -gt 47 ] || [ $peci_temp -gt $temp_low5 ]; then
      rpm=3000
      fan_reset_thresholds
      temp_low5=41
    else
      rpm=0
      fan_reset_thresholds
    fi

    if [ $last_rpm -eq $rpm ]; then
      return
    fi

    last_rpm=$rpm
    ectool pwmsetfanrpm $rpm
  }

  # External prochot misfires occasionally. Disable it.
  disable_external_prochot

  loop_count=0
  while true; do
    sleep 1
    loop_count=$(($loop_count + 1))

    # read the CPU temperature
    cpu_temp=$(get_peci_temp)

    # run the fan loop once a second
    fan_loop $cpu_temp

    # run the thermal loop once a second
    thermal_loop $cpu_temp

    # report the metrics once every 30 seconds
    if [ $loop_count -lt 30 ]; then
      continue
    fi
    loop_count=0

    ectool temps all | while read line; do
      index=$(printf "%02d" "${line%%:*}")
      tempk="${line##* }"
      tempc=$(($tempk - $TEMP_OFFSET))
      # ignore values below freezing
      if [ $tempc -lt 0 ]; then
        tempc=0
      fi
      # Use a linear histogram with 1 C buckets starting at 0.
      N_SLOTS=180
      metrics_client -e Platform.Temperature.Sensor$index $tempc $N_SLOTS
    done
  done
end script