summaryrefslogtreecommitdiff
path: root/util/temp_metrics.conf
blob: fc35e08ef5d81d91850772bf4c3a0adc4a9b3d6c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

description     "Temporary, quick-hack metrics collection & thermal daemon"
author          "chromium-os-dev@chromium.org"

# This is for quickly adding UMA stats that we may need for
# short-term experiments, when we don't have the time to add
# stuff to metrics_daemon.  That's where it should go in the
# long term.
#
# This is also currently doing a userland thermal loop to allow
# for quick experimentation.  This thermal loop will eventually
# move to the BIOS once the data from experiments help prove its
# efficacy.

start on started system-services
stop on stopping system-services
respawn

script
  TEMP_OFFSET=273  # difference between K (reported by EC) and C (used in UMA)

  # Thermal loop fields
  CPU_MAX_FREQ_FIELD=1
  CPU_MIN_FREQ_FIELD=2
  GPU_MAX_FREQ_FIELD=3
  CPU_DUTY_CYCLE_FIELD=4
  PKG_POWER_LIMIT_FIELD=5

  # Thermal loop steps
  all_steps="
  1801000 800000 1150 0 0x180aa00dd8088 # no throttling
  1801000 800000 1150 0 0x180aa00dd8080 # cap pkg to 16W
  1801000 800000 1150 0 0x180aa00dd8078 # cap pkg to 15W
  1801000 800000 1150 0 0x180aa00dd8070 # cap pkg to 14W
  1801000 800000 1150 0 0x180aa00dd8068 # cap pkg to 13W
  1800000 800000 900 0 0x180aa00dd8068 # disable turbo
  1600000 800000 800 0 0x180aa00dd8068 # cap CPU & GPU frequency
  1400000 800000 700 0 0x180aa00dd8068 # cap CPU & GPU frequency
  1200000 800000 600 0 0x180aa00dd8068 # cap CPU & GPU frequency
  1000000 800000 500 0 0x180aa00dd8068 # cap CPU & GPU frequency
  800000 800000 400 0 0x180aa00dd8068 # cap CPU & GPU frequency
  800000 800000 350 0 0x180aa00dd8068 # cap CPU & GPU frequency
  800000 800000 350 0x1c 0x180aa00dd8068 # duty cycle CPU
  800000 800000 350 0x18 0x180aa00dd8068 # duty cycle CPU
  "
  max_steps=$(($(echo "$all_steps" | wc -l) - 3))

  get_step() {
    row=$(($1 + 2))
    out=$(echo "$all_steps" | awk "{if (NR==$row) print}")
    echo "$out"
  }

  get_field() {
    out=$(echo "$2" | awk "{print \$$1}")
    echo $out
  }

  get_peci_temp() {
    tempk=$(ectool temps 9 | sed 's/[^0-9]//g')
    tempc=$((tempk - $TEMP_OFFSET))
    echo $tempc
  }

  get_sensor_temp() {
    s=$1
    tempk=$(ectool temps $s | sed 's/[^0-9]//g')
    tempc=$((tempk - $TEMP_OFFSET))
    echo $tempc
  }

  get_sensor_list() {
    # USB C-Object: 1 or 13
    # PCH D-Object: 3
    # Hinge C-Object: 5 or 15
    # Charger D-Object: 7
    if ectool tempsinfo 1 | grep -q "USB C-Object"; then
      usb_c_object=1
    else
      usb_c_object=13
    fi
    pch_d_object=3
    charger_d_object=7
    echo $usb_c_object $pch_d_object $charger_d_object
  }

  max_skin_temp=0
  sensor_temperatures=""
  get_max_skin_temp() {
    sensor_temperatures=""
    max_skin_temp=0
    for i in $*; do
      t=$(get_sensor_temp $i)
      sensor_temperatures=$sensor_temperatures$i:$t:
      if [ $t -gt $max_skin_temp ]; then
        max_skin_temp=$t
      fi
    done

    # Record the PECI CPU temperature also.
    i=9
    t=$(get_sensor_temp $i)
    sensor_temperatures=$sensor_temperatures$i:$t:
  }

  set_cpu_freq() {
    max_freq=$1
    min_freq=$2
    for cpu in /sys/devices/system/cpu/cpu?/cpufreq; do
      echo 800000 > $cpu/scaling_min_freq
      echo 800000 > $cpu/scaling_max_freq
      echo $max_freq > $cpu/scaling_max_freq
      echo $min_freq > $cpu/scaling_min_freq
    done
  }

  set_gpu_freq() {
    gpu_freq=$1
    echo $gpu_freq > /sys/kernel/debug/dri/0/i915_max_freq
  }

  set_duty_cycle() {
    duty_cycle=$1
    for i in 0 1 2 3; do
      wrmsr $i 0x19a $duty_cycle
    done
  }

  set_pkg_power_limit() {
    pwr_limit=$1
    wrmsr 0 0x610 $pwr_limit
  }

  disable_external_prochot() {
    wrmsr 0 0x1fc 0x000000000004005e
  }

  disable_tcc_offset() {
    wrmsr 0 0x1a2 0
  }

  log_message() {
    logger -t temp_metrics "$*"
  }

  TEMP_THRESHOLD_1=38
  TEMP_THRESHOLD_1_WM=40
  TEMP_THRESHOLD_2=45
  TEMP_THRESHOLD_2_WM=47
  TEMP_THRESHOLD_3=50
  TEMP_THRESHOLD_3_WM=50

  TEMP_THRESHOLD_0_MIN_STEP=0
  TEMP_THRESHOLD_0_MAX_STEP=0
  TEMP_THRESHOLD_1_MIN_STEP=1
  TEMP_THRESHOLD_1_MAX_STEP=5
  TEMP_THRESHOLD_2_MIN_STEP=6
  TEMP_THRESHOLD_2_MAX_STEP=9
  TEMP_THRESHOLD_3_MIN_STEP=10
  TEMP_THRESHOLD_3_MAX_STEP=13

  current_step=1
  new_step=0

  thermal_loop() {
    # Hack to reset turbo activation threshold since BIOS can change it
    # underneath us.
    wrmsr 0 0x64c 0x12

    skin_temp=$1
    if [ $skin_temp -gt $TEMP_THRESHOLD_3 ]; then
      temp_watermark=$TEMP_THRESHOLD_3_WM
      min_step=$TEMP_THRESHOLD_3_MIN_STEP
      max_step=$TEMP_THRESHOLD_3_MAX_STEP
    elif [ $skin_temp -gt $TEMP_THRESHOLD_2 ]; then
      temp_watermark=$TEMP_THRESHOLD_2_WM
      min_step=$TEMP_THRESHOLD_2_MIN_STEP
      max_step=$TEMP_THRESHOLD_2_MAX_STEP
    elif [ $skin_temp -gt $TEMP_THRESHOLD_1 ]; then
      temp_watermark=$TEMP_THRESHOLD_1_WM
      min_step=$TEMP_THRESHOLD_1_MIN_STEP
      max_step=$TEMP_THRESHOLD_1_MAX_STEP
    else
      temp_watermark=0
      min_step=$TEMP_THRESHOLD_0_MIN_STEP
      max_step=$TEMP_THRESHOLD_0_MAX_STEP
    fi

    if [ $skin_temp -gt $temp_watermark ]; then
      if [ $current_step -ne $max_step ]; then
        new_step=$(($current_step + 1))
      fi
    elif [ $skin_temp -lt $temp_watermark ]; then
      if [ $current_step -gt $min_step ]; then
        new_step=$(($current_step - 1))
      fi
    else
      new_step=$current_step
    fi

    if [ $new_step -gt $max_step ]; then
        new_step=$max_step
    elif [ $new_step -lt $min_step ]; then
        new_step=$min_step
    fi

    if  [ $new_step -eq $current_step ]; then
      return
    fi

    current_step=$new_step
    step=$(get_step $new_step)

    log_message "Throttling (temps: $sensor_temperatures):" $step

    cpu_max_freq=$(get_field $CPU_MAX_FREQ_FIELD "$step")
    cpu_min_freq=$(get_field $CPU_MIN_FREQ_FIELD "$step")
    gpu_max_freq=$(get_field $GPU_MAX_FREQ_FIELD "$step")
    cpu_duty_cycle=$(get_field $CPU_DUTY_CYCLE_FIELD "$step")
    pkg_power_limit=$(get_field $PKG_POWER_LIMIT_FIELD "$step")

    set_cpu_freq $cpu_max_freq $cpu_min_freq
    set_gpu_freq $gpu_max_freq
    set_duty_cycle $cpu_duty_cycle
    set_pkg_power_limit $pkg_power_limit
  }

  fan_reset_thresholds() {
    temp_low1=105
    temp_low2=105
    temp_low3=105
    temp_low4=105
    temp_low5=105
    temp_low6=105
  }

  last_rpm=10
  temp_low1=105
  temp_low2=105
  temp_low3=105
  temp_low4=105
  temp_low5=105
  temp_low6=105

  fan_loop() {
    skin_temp=$1

    if [ $skin_temp -gt 48 ] || [ $skin_temp -gt $temp_low1 ]; then
      rpm=9300
      fan_reset_thresholds
      temp_low1=46
    elif [ $skin_temp -gt 44 ] || [ $skin_temp -gt $temp_low2 ]; then
      rpm=8000
      fan_reset_thresholds
      temp_low2=43
    elif [ $skin_temp -gt 42 ] || [ $skin_temp -gt $temp_low3 ]; then
      rpm=7000
      fan_reset_thresholds
      temp_low3=41
    elif [ $skin_temp -gt 40 ] || [ $skin_temp -gt $temp_low4 ]; then
      rpm=5500
      fan_reset_thresholds
      temp_low4=39
    elif [ $skin_temp -gt 38 ] || [ $skin_temp -gt $temp_low5 ]; then
      rpm=4000
      fan_reset_thresholds
      temp_low5=34
    elif [ $skin_temp -gt 33 ] || [ $skin_temp -gt $temp_low6 ]; then
      rpm=3000
      fan_reset_thresholds
      temp_low6=30
    else
      rpm=0
      fan_reset_thresholds
    fi

    if [ $last_rpm -eq $rpm ]; then
      return
    fi

    log_message "Setting fan RPM (temps: $sensor_temperatures): $last_rpm -> $rpm"

    last_rpm=$rpm
    ectool pwmsetfanrpm $rpm
  }

  # External prochot misfires occasionally. Disable it.
  disable_external_prochot

  # Set TCC offset to 0.
  disable_tcc_offset

  # Get list of sensors to monitor.
  sensor_list=$(get_sensor_list)

  loop_count=0
  while true; do
    sleep 10
    loop_count=$(($loop_count + 1))

    # Read the max skin temperature.
    get_max_skin_temp $sensor_list

    if [ $max_skin_temp -eq 0 ]; then
      # TODO (snanda): use PECI temperature as a fallback.
      log_message "Invalid max skin temp"
    fi

    # Run the fan loop every 10 seconds.
    fan_loop $max_skin_temp

    # Run the thermal loop every 10 seconds.
    thermal_loop $max_skin_temp

    # Report the metrics once every 30 seconds.
    if [ $loop_count -lt 3 ]; then
      continue
    fi
    loop_count=0

    ectool temps all | while read line; do
      index=$(printf "%02d" "${line%%:*}")
      tempk="${line##* }"
      tempc=$(($tempk - $TEMP_OFFSET))
      # ignore values below freezing
      if [ $tempc -lt 0 ]; then
        tempc=0
      fi
      # Use a linear histogram with 1 C buckets starting at 0.
      N_SLOTS=180
      metrics_client -e Platform.Temperature.Sensor$index $tempc $N_SLOTS
    done
  done
end script