summaryrefslogtreecommitdiff
path: root/util/temp_metrics.conf
diff options
context:
space:
mode:
authorJack Rosenthal <jrosenth@chromium.org>2021-11-04 12:11:58 -0600
committerCommit Bot <commit-bot@chromium.org>2021-11-05 04:22:34 +0000
commit252457d4b21f46889eebad61d4c0a65331919cec (patch)
tree01856c4d31d710b20e85a74c8d7b5836e35c3b98 /util/temp_metrics.conf
parent08f5a1e6fc2c9467230444ac9b582dcf4d9f0068 (diff)
downloadchrome-ec-stabilize-14589.B-ish.tar.gz
In the interest of making long-term branch maintenance incur as little technical debt on us as possible, we should not maintain any files on the branch we are not actually using. This has the added effect of making it extremely clear when merging CLs from the main branch when changes have the possibility to affect us. The follow-on CL adds a convenience script to actually pull updates from the main branch and generate a CL for the update. BUG=b:204206272 BRANCH=ish TEST=make BOARD=arcada_ish && make BOARD=drallion_ish Signed-off-by: Jack Rosenthal <jrosenth@chromium.org> Change-Id: I17e4694c38219b5a0823e0a3e55a28d1348f4b18 Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/ec/+/3262038 Reviewed-by: Jett Rink <jettrink@chromium.org> Reviewed-by: Tom Hughes <tomhughes@chromium.org>
Diffstat (limited to 'util/temp_metrics.conf')
-rw-r--r--util/temp_metrics.conf396
1 files changed, 0 insertions, 396 deletions
diff --git a/util/temp_metrics.conf b/util/temp_metrics.conf
deleted file mode 100644
index ccd3254beb..0000000000
--- a/util/temp_metrics.conf
+++ /dev/null
@@ -1,396 +0,0 @@
-# Copyright 2012 The Chromium OS Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-description "Temporary, quick-hack metrics collection & thermal daemon"
-author "chromium-os-dev@chromium.org"
-
-# This is for quickly adding UMA stats that we may need for
-# short-term experiments, when we don't have the time to add
-# stuff to metrics_daemon. That's where it should go in the
-# long term.
-#
-# This is also currently doing a userland thermal loop to allow
-# for quick experimentation. This thermal loop will eventually
-# move to the BIOS once the data from experiments help prove its
-# efficacy.
-
-start on started system-services
-stop on stopping system-services
-respawn
-
-script
- TEMP_OFFSET=273 # difference between K (reported by EC) and C (used in UMA)
-
- # Thermal loop fields
- CPU_MAX_FREQ_FIELD=1
- CPU_MIN_FREQ_FIELD=2
- GPU_MAX_FREQ_FIELD=3
- CPU_DUTY_CYCLE_FIELD=4
- PKG_POWER_LIMIT_FIELD=5
-
- # Thermal loop steps
- all_steps="
- 1801000 800000 1150 0 0x180aa00dd8088 # no throttling
- 1801000 800000 1150 0 0x180aa00dd8080 # cap pkg to 16W
- 1801000 800000 1150 0 0x180aa00dd8078 # cap pkg to 15W
- 1801000 800000 1150 0 0x180aa00dd8070 # cap pkg to 14W
- 1801000 800000 1150 0 0x180aa00dd8068 # cap pkg to 13W
- 1800000 800000 900 0 0x180aa00dd8068 # disable turbo
- 1600000 800000 800 0 0x180aa00dd8068 # cap CPU & GPU frequency
- 1400000 800000 700 0 0x180aa00dd8068 # cap CPU & GPU frequency
- 1200000 800000 600 0 0x180aa00dd8068 # cap CPU & GPU frequency
- 1000000 800000 500 0 0x180aa00dd8068 # cap CPU & GPU frequency
- 800000 800000 400 0 0x180aa00dd8068 # cap CPU & GPU frequency
- 800000 800000 350 0 0x180aa00dd8068 # cap CPU & GPU frequency
- 800000 800000 350 0x1c 0x180aa00dd8068 # duty cycle CPU
- 800000 800000 350 0x18 0x180aa00dd8068 # duty cycle CPU
- "
- max_steps=$(($(echo "$all_steps" | wc -l) - 3))
-
- get_step() {
- row=$(($1 + 2))
- out=$(echo "$all_steps" | awk "{if (NR==$row) print}")
- echo "$out"
- }
-
- get_field() {
- out=$(echo "$2" | awk "{print \$$1}")
- echo $out
- }
-
- get_peci_temp() {
- tempk=$(ectool temps 9 | sed 's/[^0-9]//g')
- tempc=$((tempk - $TEMP_OFFSET))
- echo $tempc
- }
-
- get_sensor_temp() {
- s=$1
- tempc=0
- if out=$(ectool temps $s); then
- tempk=$(echo $out | sed 's/[^0-9]//g')
- tempc=$((tempk - $TEMP_OFFSET))
- fi
- echo $tempc
- }
-
- get_sensor_list() {
- # USB C-Object: 1 or 13
- # PCH D-Object: 3
- # Hinge C-Object: 5 or 15
- # Charger D-Object: 7
- if ectool tempsinfo 1 | grep -q "USB C-Object"; then
- usb_c_object=1
- else
- usb_c_object=13
- fi
- charger_d_object=7
- echo $usb_c_object $charger_d_object
- }
-
- set_calibration_data() {
- B0='-2.94e-5'
- B1='-5.7e-7'
- B2='4.63e-9'
-
- USB_C_S0='2.712e-14'
- PCH_D_S0='9.301e-14'
- HINGE_C_S0='-11.000e-14'
- CHARGER_D_S0='5.141e-14'
-
- # Note that the sensor numbering is different between the ectool tmp006
- # and temps/tempsinfo commands.
- USB_C="0 $USB_C_S0 $B0 $B1 $B2"
- PCH_D="1 $PCH_D_S0 $B0 $B1 $B2"
- HINGE_C="2 $HINGE_C_S0 $B0 $B1 $B2"
- CHARGER_D="3 $CHARGER_D_S0 $B0 $B1 $B2"
-
- for i in "$USB_C" "$PCH_D" "$HINGE_C" "$CHARGER_D"; do
- # Add "--" otherwise ectool will barf when trying to parse negative
- # coefficients.
- ectool tmp006cal -- $i
- done
- }
-
- max_skin_temp=0
- sensor_temperatures=
-
- get_max_skin_temp() {
- sensor_temperatures=
- max_skin_temp=0
- for i in $*; do
- t=$(get_sensor_temp $i)
- sensor_temperatures=$sensor_temperatures$i:$t:
- if [ $t -gt $max_skin_temp ]; then
- max_skin_temp=$t
- fi
- done
-
- # Record the PECI CPU temperature also.
- i=9
- t=$(get_sensor_temp $i)
- sensor_temperatures=$sensor_temperatures$i:$t:
- }
-
- set_cpu_freq() {
- max_freq=$1
- min_freq=$2
- for cpu in /sys/devices/system/cpu/cpu?/cpufreq; do
- echo 800000 > $cpu/scaling_min_freq
- echo 800000 > $cpu/scaling_max_freq
- echo $max_freq > $cpu/scaling_max_freq
- echo $min_freq > $cpu/scaling_min_freq
- done
- }
-
- set_gpu_min_freq() {
- GPU_MIN_FREQ=450
- echo $GPU_MIN_FREQ > /sys/kernel/debug/dri/0/i915_min_freq
- }
-
- set_gpu_max_freq() {
- gpu_max_freq=$1
- if [ $GPU_MIN_FREQ -gt $gpu_max_freq ]; then
- gpu_max_freq=$GPU_MIN_FREQ
- fi
- echo $gpu_max_freq > /sys/kernel/debug/dri/0/i915_max_freq
- }
-
- set_duty_cycle() {
- duty_cycle=$1
- for i in 0 1 2 3; do
- iotools wrmsr $i 0x19a $duty_cycle
- done
- }
-
- set_pkg_power_limit() {
- pwr_limit=$1
- iotools wrmsr 0 0x610 $pwr_limit
- }
-
- log_message() {
- logger -t temp_metrics "$*"
- }
-
- TEMP_THRESHOLD_1=38
- TEMP_THRESHOLD_1_WM=40
- TEMP_THRESHOLD_2=45
- TEMP_THRESHOLD_2_WM=47
- TEMP_THRESHOLD_3=50
- TEMP_THRESHOLD_3_WM=50
-
- TEMP_THRESHOLD_0_MIN_STEP=0
- TEMP_THRESHOLD_0_MAX_STEP=0
- TEMP_THRESHOLD_1_MIN_STEP=1
- TEMP_THRESHOLD_1_MAX_STEP=5
- TEMP_THRESHOLD_2_MIN_STEP=6
- TEMP_THRESHOLD_2_MAX_STEP=9
- TEMP_THRESHOLD_3_MIN_STEP=10
- TEMP_THRESHOLD_3_MAX_STEP=13
-
- current_step=1
- new_step=0
-
- thermal_loop() {
- # Hack to reset turbo activation threshold since BIOS can change it
- # underneath us.
- iotools wrmsr 0 0x64c 0x12
-
- skin_temp=$1
- if [ $skin_temp -gt $TEMP_THRESHOLD_3 ]; then
- temp_watermark=$TEMP_THRESHOLD_3_WM
- min_step=$TEMP_THRESHOLD_3_MIN_STEP
- max_step=$TEMP_THRESHOLD_3_MAX_STEP
- elif [ $skin_temp -gt $TEMP_THRESHOLD_2 ]; then
- temp_watermark=$TEMP_THRESHOLD_2_WM
- min_step=$TEMP_THRESHOLD_2_MIN_STEP
- max_step=$TEMP_THRESHOLD_2_MAX_STEP
- elif [ $skin_temp -gt $TEMP_THRESHOLD_1 ]; then
- temp_watermark=$TEMP_THRESHOLD_1_WM
- min_step=$TEMP_THRESHOLD_1_MIN_STEP
- max_step=$TEMP_THRESHOLD_1_MAX_STEP
- else
- temp_watermark=0
- min_step=$TEMP_THRESHOLD_0_MIN_STEP
- max_step=$TEMP_THRESHOLD_0_MAX_STEP
- fi
-
- if [ $skin_temp -gt $temp_watermark ]; then
- if [ $current_step -ne $max_step ]; then
- new_step=$(($current_step + 1))
- fi
- elif [ $skin_temp -lt $temp_watermark ]; then
- if [ $current_step -gt $min_step ]; then
- new_step=$(($current_step - 1))
- fi
- else
- new_step=$current_step
- fi
-
- if [ $new_step -gt $max_step ]; then
- new_step=$max_step
- elif [ $new_step -lt $min_step ]; then
- new_step=$min_step
- fi
-
- if [ $new_step -eq $current_step ]; then
- return
- fi
-
- current_step=$new_step
- step=$(get_step $new_step)
-
- log_message "Throttling (temps: $sensor_temperatures):" $step
-
- cpu_max_freq=$(get_field $CPU_MAX_FREQ_FIELD "$step")
- cpu_min_freq=$(get_field $CPU_MIN_FREQ_FIELD "$step")
- gpu_max_freq=$(get_field $GPU_MAX_FREQ_FIELD "$step")
- cpu_duty_cycle=$(get_field $CPU_DUTY_CYCLE_FIELD "$step")
- pkg_power_limit=$(get_field $PKG_POWER_LIMIT_FIELD "$step")
-
- set_cpu_freq $cpu_max_freq $cpu_min_freq
- set_gpu_max_freq $gpu_max_freq
- set_duty_cycle $cpu_duty_cycle
- set_pkg_power_limit $pkg_power_limit
- }
-
- get_fan_rpm() {
- echo $(ectool pwmgetfanrpm | sed 's/[^0-9]//g')
- }
-
- set_fan_rpm() {
- ectool pwmsetfanrpm $1
- }
-
- reset_fan_thresholds() {
- temp_low1=105
- temp_low2=105
- temp_low3=105
- temp_low4=105
- temp_low5=105
- temp_low6=105
- }
-
- last_rpm=10
- temp_low1=105
- temp_low2=105
- temp_low3=105
- temp_low4=105
- temp_low5=105
- temp_low6=105
-
- fan_loop() {
- skin_temp=$1
-
- if [ $skin_temp -gt 48 ] || [ $skin_temp -gt $temp_low1 ]; then
- rpm=9300
- reset_fan_thresholds
- temp_low1=46
- elif [ $skin_temp -gt 44 ] || [ $skin_temp -gt $temp_low2 ]; then
- rpm=8000
- reset_fan_thresholds
- temp_low2=43
- elif [ $skin_temp -gt 42 ] || [ $skin_temp -gt $temp_low3 ]; then
- rpm=7000
- reset_fan_thresholds
- temp_low3=41
- elif [ $skin_temp -gt 40 ] || [ $skin_temp -gt $temp_low4 ]; then
- rpm=5500
- reset_fan_thresholds
- temp_low4=39
- elif [ $skin_temp -gt 38 ] || [ $skin_temp -gt $temp_low5 ]; then
- rpm=4000
- reset_fan_thresholds
- temp_low5=34
- elif [ $skin_temp -gt 33 ] || [ $skin_temp -gt $temp_low6 ]; then
- rpm=3000
- reset_fan_thresholds
- temp_low6=30
- else
- rpm=0
- reset_fan_thresholds
- fi
-
- # During S0->S3->S0 transitions, the EC sets the fan RPM to 0. This script
- # isn't aware of such transitions. Read the current fan RPM again to see
- # if it got set to 0. Note that comparing the current fan RPM against last
- # requested RPM won't suffice since the actual fan RPM may not be exactly
- # what was requested.
- cur_rpm=$(get_fan_rpm)
- if ([ $cur_rpm -ne 0 ] && [ $last_rpm -eq $rpm ]) || \
- ([ $cur_rpm -eq 0 ] && [ $rpm -eq 0 ]); then
- last_rpm=$rpm
- return
- fi
-
- log_message "Setting fan RPM (temps: $sensor_temperatures): $last_rpm -> $rpm"
-
- last_rpm=$rpm
- set_fan_rpm $rpm
- }
-
- # Thermal zone 1 is for operating systems where a userland thermal loop
- # doesn't exist. Disable it.
- if [ -e /sys/class/thermal/thermal_zone1/mode ]; then
- echo -n 'disabled' > /sys/class/thermal/thermal_zone1/mode
- fi
-
- # Enable the fan in case no other code has enabled it.
- ectool fanduty 0
-
- # Get list of sensors to monitor.
- sensor_list=$(get_sensor_list)
-
- # Set sensor calibration data.
- set_calibration_data
-
- # Set minimum GPU frequency.
- set_gpu_min_freq
-
- loop_count=0
- ec_fan_loop=0
-
- while true; do
- sleep 10
- loop_count=$(($loop_count + 1))
-
- # Read the max skin temperature.
- get_max_skin_temp $sensor_list
-
- if [ $max_skin_temp -eq 0 ]; then
- if [ $ec_fan_loop -eq 0 ]; then
- log_message "Invalid max skin temp. Switching to EC fan loop."
- ectool autofanctrl
- ec_fan_loop=1
- last_rpm=10
- fi
- else
- # Run the fan loop.
- fan_loop $max_skin_temp
- ec_fan_loop=0
-
- # Run the thermal loop.
- thermal_loop $max_skin_temp
- fi
-
- # Report the metrics once every 30 seconds.
- if [ $loop_count -lt 3 ]; then
- continue
- fi
- loop_count=0
-
- ectool temps all | while read line; do
- index=$(printf "%02d" "${line%%:*}")
- tempk="${line##* }"
- tempc=$(($tempk - $TEMP_OFFSET))
- # ignore values below freezing
- if [ $tempc -lt 0 ]; then
- tempc=0
- fi
- # Use a linear histogram with 1 C buckets starting at 0.
- N_SLOTS=180
- metrics_client -e Platform.Temperature.Sensor$index $tempc $N_SLOTS
- done
- done
-end script