You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
439 lines
14 KiB
439 lines
14 KiB
/** |
|
* Copyright 2019 Shawn Anastasio |
|
* |
|
* This file is part of op-fan-daemon. |
|
* |
|
* op-fan-daemon is free software: you can redistribute it and/or modify |
|
* it under the terms of the GNU General Public License as published by |
|
* the Free Software Foundation, either version 3 of the License, or |
|
* (at your option) any later version. |
|
* |
|
* op-fan-daemon is distributed in the hope that it will be useful, |
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
* GNU General Public License for more details. |
|
* |
|
* You should have received a copy of the GNU General Public License |
|
* along with op-fan-daemon. If not, see <https://www.gnu.org/licenses/>. |
|
*/ |
|
|
|
/** |
|
* A simple fan daemon for OpenPOWER systems |
|
*/ |
|
|
|
#include <stdio.h> |
|
#include <stdlib.h> |
|
#include <string.h> |
|
#include <stdbool.h> |
|
#include <stdint.h> |
|
#include <errno.h> |
|
#include <inttypes.h> |
|
#include <assert.h> |
|
|
|
#include <dirent.h> |
|
#include <fcntl.h> |
|
#include <signal.h> |
|
#include <syslog.h> |
|
#include <sys/select.h> |
|
#include <sys/signalfd.h> |
|
#include <sys/stat.h> |
|
#include <sys/time.h> |
|
#include <sys/types.h> |
|
#include <unistd.h> |
|
|
|
#define DECLARE_CURVES 1 |
|
#include "fan-daemon.h" |
|
#include "control.h" |
|
#include "curve.h" |
|
#include "util.h" |
|
|
|
#define REASONABLE_PATH_LEN 1024 |
|
|
|
const char cpu0_sensors_path[] = "/sys/devices/platform/gpio-fsi/fsi0/slave@00:00/00:00:00:06/sbefifo1-dev0"; |
|
const char cpu1_sensors_path[] = "/sys/devices/platform/gpio-fsi/fsi0/slave@00:00/00:00:00:0a/fsi1/slave@01:00/01:01:00:06/sbefifo2-dev0"; |
|
|
|
// Globally registered PWM providers |
|
struct pwm_ops *pwm_providers[NUM_PWM_PROVIDERS] = { 0 }; |
|
struct pwm_ops *pwm_provider = NULL; |
|
|
|
void install_pwm_provider(struct pwm_ops *ops) { |
|
for (size_t i=0; i<NUM_PWM_PROVIDERS; i++) { |
|
if (!pwm_providers[i]) { |
|
pwm_providers[i] = ops; |
|
return; |
|
} |
|
} |
|
|
|
fprintf(stderr, "FATAL: Unable to install pwm provider: not enough space.\n"); |
|
exit(EXIT_FAILURE); |
|
} |
|
|
|
/** |
|
* Initialize a sensor group from a given path. |
|
* |
|
* @param path path to OCC sbefifo device in /sys |
|
* @param[out] group_out sensor_group struct to initialize on success |
|
* @return success? |
|
*/ |
|
static bool open_sensor_group(const char *path, struct sensor_group *group_out) { |
|
bool ret = false; |
|
char pathbuf[REASONABLE_PATH_LEN]; |
|
char occ_hwmon_name[12 + 1 /* strlen("occ-hwmon.XX") */]; |
|
struct dirent *child; |
|
|
|
DIR *base = opendir(path); |
|
if (!base) { |
|
syslog(LOG_ERR, "Couldn't open provided path!\n"); |
|
goto out; |
|
} |
|
|
|
// Walk fs to get to occ-hwmon.*/hwmon directory |
|
DIR *occ_hwmon = NULL; |
|
while ((child = readdir(base))) { |
|
if (child->d_name[0] == '.') |
|
continue; // Skip hidden, ".", ".." |
|
|
|
if (!strncmp("occ-hwmon.", child->d_name, 9)) { |
|
snprintf(pathbuf, sizeof(pathbuf), "%s/%s/hwmon", path, child->d_name); |
|
occ_hwmon = opendir(pathbuf); |
|
|
|
strncpy(occ_hwmon_name, child->d_name, sizeof(occ_hwmon_name) - 1); |
|
occ_hwmon_name[sizeof(occ_hwmon_name) - 1] = '\0'; |
|
break; |
|
} |
|
} |
|
|
|
if (!occ_hwmon) { |
|
syslog(LOG_ERR, "Couldn't find occ_hwmon!\n"); |
|
goto out_base; |
|
} |
|
|
|
// Walk fs to get to hwmonN directory |
|
DIR *hwmonN = NULL; |
|
while ((child = readdir(occ_hwmon))) { |
|
if (child->d_name[0] == '.') |
|
continue; // Skip hidden, ".", ".." |
|
|
|
if (!strncmp("hwmon", child->d_name, 5)) { |
|
snprintf(pathbuf, sizeof(pathbuf), "%s/%s/hwmon/%s", path, occ_hwmon_name, child->d_name); |
|
hwmonN = opendir(pathbuf); |
|
break; |
|
} |
|
} |
|
|
|
if (!hwmonN) { |
|
syslog(LOG_ERR, "Couldn't find hwmonN!\n"); |
|
goto out_occ_hwmon; |
|
} |
|
|
|
// Successfully found and opened the appropriate hwmon directory, return it. |
|
group_out->dir = hwmonN; |
|
strncpy(group_out->dir_path, pathbuf, sizeof(group_out->dir_path) - 1); |
|
group_out->dir_path[sizeof(group_out->dir_path) - 1] = '\0'; |
|
ret = true; |
|
|
|
out_occ_hwmon: |
|
closedir(occ_hwmon); |
|
out_base: |
|
closedir(base); |
|
out: |
|
return ret; |
|
} |
|
|
|
/** |
|
* Populate a given list of sensors with all sensors in a given group that |
|
* match a given FRU. |
|
* |
|
* @param list list to populate |
|
* @param group sensors group to search in |
|
* @param desired_fru FRU to filter by |
|
* @return success? |
|
*/ |
|
static bool populate_sensor_list(struct vec_int *list, struct sensor_group *group, uint8_t desired_fru) { |
|
char pathbuf[REASONABLE_PATH_LEN + 256 /* d_name field is 256 bytes wide */]; |
|
struct dirent *child; |
|
|
|
// Walk sensor directory for *_fru_type nodes and read them |
|
rewinddir(group->dir); |
|
while ((child = readdir(group->dir))) { |
|
if (child->d_name[0] == '.') |
|
continue; // Skip hidden, ".", ".." |
|
|
|
if (!strstr(child->d_name, "_fru_type")) |
|
continue; // Not what we're looking for |
|
|
|
// Read FRU |
|
snprintf(pathbuf, sizeof(pathbuf), "%s/%s", group->dir_path, child->d_name); |
|
int type_fd = open(pathbuf, O_RDONLY); |
|
if (type_fd < 0) |
|
goto skip_sensor; |
|
|
|
char fru_str[2 + 1 /* strlen("00") */]; |
|
ssize_t n; |
|
if ((n = read(type_fd, fru_str, sizeof(fru_str) - 1)) < 0) { |
|
close(type_fd); |
|
goto skip_sensor; |
|
} |
|
fru_str[n] = '\0'; |
|
close(type_fd); |
|
|
|
// Convert FRU to number |
|
char *endptr; |
|
long fru = strtol(fru_str, &endptr, 10); |
|
if (*endptr != '\n' && *endptr != '\0') |
|
goto skip_sensor; |
|
|
|
// See if FRU matches |
|
if (fru != desired_fru) |
|
continue; |
|
|
|
// Open fd to this sensor's input file and append to list |
|
uint8_t num; |
|
sscanf(child->d_name, "temp%" SCNu8 "_fru_type", &num); |
|
|
|
snprintf(pathbuf, sizeof(pathbuf), "%s/temp%" PRIu8 "_input", group->dir_path, num); |
|
int sensor_fd = open(pathbuf, O_RDONLY); |
|
if (sensor_fd < 0) |
|
goto skip_sensor; |
|
|
|
vec_int_push_back(list, sensor_fd); |
|
|
|
continue; |
|
skip_sensor: |
|
syslog(LOG_INFO, "Failed to read sensor %s: %m.\n", child->d_name); |
|
} |
|
|
|
return true; |
|
} |
|
|
|
/** |
|
* Initialize sensors and platform PWM controller |
|
*/ |
|
struct controller_state *controller_init(void) { |
|
struct controller_state *state = calloc(1, sizeof(struct controller_state)); |
|
if (!state) { |
|
syslog(LOG_ERR, "Unable to allocate program state: %m.\n"); |
|
return NULL; |
|
} |
|
|
|
if (!vec_int_init(&state->zones[ZONE_CPU0].sensors, 50, close_destructor) || |
|
!vec_int_init(&state->zones[ZONE_CPU1].sensors, 50, close_destructor) || |
|
!vec_int_init(&state->zones[ZONE_CHASSIS].sensors, 50, close_destructor)) { |
|
syslog(LOG_ERR, "Unable to allocate sensor lists: %m.\n"); |
|
goto fail; |
|
} |
|
|
|
// Select fan curves to use for each zone |
|
state->zones[ZONE_CPU0].curve = zone_cpu0_curve; |
|
state->zones[ZONE_CPU0].curve_size = ARRAY_SIZE(zone_cpu0_curve); |
|
state->zones[ZONE_CPU0].curve_pos = ARRAY_SIZE(zone_cpu0_curve) - 1; |
|
state->zones[ZONE_CPU0].next_curve_pos = 0; |
|
state->zones[ZONE_CPU0].next_curve_count = 0; |
|
|
|
state->zones[ZONE_CPU1].curve = zone_cpu1_curve; |
|
state->zones[ZONE_CPU1].curve_size = ARRAY_SIZE(zone_cpu1_curve); |
|
state->zones[ZONE_CPU1].curve_pos = ARRAY_SIZE(zone_cpu1_curve) - 1; |
|
state->zones[ZONE_CPU1].next_curve_pos = 0; |
|
state->zones[ZONE_CPU1].next_curve_count = 0; |
|
|
|
state->zones[ZONE_CHASSIS].curve = zone_chassis_curve; |
|
state->zones[ZONE_CHASSIS].curve_size = ARRAY_SIZE(zone_chassis_curve); |
|
state->zones[ZONE_CHASSIS].curve_pos = ARRAY_SIZE(zone_chassis_curve) - 1; |
|
state->zones[ZONE_CHASSIS].next_curve_pos = 0; |
|
state->zones[ZONE_CHASSIS].next_curve_count = 0; |
|
|
|
// Open sensor groups |
|
if (!open_sensor_group(cpu0_sensors_path, &state->occ0)) { |
|
syslog(LOG_ERR, "Unable to open OCC sensors for CPU0: %m.\n"); |
|
goto fail; |
|
} |
|
|
|
if (!open_sensor_group(cpu1_sensors_path, &state->occ1)) |
|
syslog(LOG_INFO, "Unable to open OCC sensors for CPU1, assuming single socket system (%m).\n"); |
|
else |
|
state->flags |= STATE_FLAG_CPU1_PRESENT; |
|
|
|
|
|
// Populate zone sensor lists |
|
if (!populate_sensor_list(&state->zones[ZONE_CPU0].sensors, &state->occ0, FRU_TYPE_CORE)) { |
|
syslog(LOG_ERR, "Unable to populate sensor list for CPU0: %m.\n"); |
|
goto fail; |
|
} |
|
|
|
if (!populate_sensor_list(&state->zones[ZONE_CHASSIS].sensors, &state->occ0, FRU_TYPE_DIMM)) { |
|
syslog(LOG_ERR, "Unable to populate sensor list for Chassis (DIMM, OCC0): %m.\n"); |
|
goto fail; |
|
} |
|
|
|
if (!populate_sensor_list(&state->zones[ZONE_CHASSIS].sensors, &state->occ0, FRU_TYPE_VRM_VDD)) { |
|
syslog(LOG_ERR, "Unable to populate sensor list for Chassis (VRM_VDD, OCC0): %m.\n"); |
|
goto fail; |
|
} |
|
|
|
// If OCC1 is present, populate its sensors too |
|
if (state->flags & STATE_FLAG_CPU1_PRESENT) { |
|
if (!populate_sensor_list(&state->zones[ZONE_CPU1].sensors, &state->occ1, FRU_TYPE_CORE)) { |
|
syslog(LOG_ERR, "Unable to populate sensor list for CPU1: %m.\n"); |
|
goto fail; |
|
} |
|
|
|
if (!populate_sensor_list(&state->zones[ZONE_CHASSIS].sensors, &state->occ1, FRU_TYPE_DIMM)) { |
|
syslog(LOG_ERR, "Unable to populate sensor list for Chassis (DIMM, OCC1): %m.\n"); |
|
goto fail; |
|
} |
|
|
|
if (!populate_sensor_list(&state->zones[ZONE_CHASSIS].sensors, &state->occ1, FRU_TYPE_VRM_VDD)) { |
|
syslog(LOG_ERR, "Unable to populate sensor list for Chassis (VRM_VDD, OCC1): %m.\n"); |
|
goto fail; |
|
} |
|
} |
|
|
|
return state; |
|
|
|
fail: |
|
// Cleanup any members that were initialized |
|
if (state->occ0.dir) |
|
closedir(state->occ0.dir); |
|
if (state->occ1.dir) |
|
closedir(state->occ1.dir); |
|
for (size_t i=0; i<NUM_ZONES; i++) { |
|
if (state->zones[i].sensors.data) |
|
vec_int_destroy(&state->zones[i].sensors); |
|
} |
|
|
|
return NULL; |
|
} |
|
|
|
static void controller_destroy(struct controller_state *state) { |
|
closedir(state->occ0.dir); |
|
|
|
vec_int_destroy(&state->zones[ZONE_CPU0].sensors); |
|
vec_int_destroy(&state->zones[ZONE_CHASSIS].sensors); |
|
|
|
if (state->flags & STATE_FLAG_CPU1_PRESENT) { |
|
closedir(state->occ1.dir); |
|
vec_int_destroy(&state->zones[ZONE_CPU1].sensors); |
|
} |
|
|
|
free(state); |
|
} |
|
|
|
bool get_next_mode(int sigfd, bool cur_mode) { |
|
// select(2) on the signalfd or timeout |
|
fd_set fds; |
|
FD_ZERO(&fds); |
|
FD_SET(sigfd, &fds); |
|
struct timeval timeout = { .tv_usec = POLL_DELAY_US }; |
|
if (select(sigfd + 1, &fds, NULL, NULL, &timeout) < 0) |
|
goto fail; |
|
|
|
if (FD_ISSET(sigfd, &fds)) { |
|
struct signalfd_siginfo siginfo; |
|
if (read(sigfd, &siginfo, sizeof(siginfo)) < 0) |
|
goto fail; |
|
|
|
if (siginfo.ssi_signo == SIGUSR1) |
|
return false; // Disable fan control |
|
else if (siginfo.ssi_signo == SIGUSR2) |
|
return true; // Enable fan control |
|
else |
|
syslog(LOG_WARNING, "Unknown signal received (%d). Ignoring.\n", siginfo.ssi_signo); |
|
} |
|
|
|
// Timeout reached or unknown signal, return current mode |
|
return cur_mode; |
|
|
|
fail: |
|
syslog(LOG_ERR, "Error encountered while polling: %m. Disabling fan control.\n"); |
|
return false; |
|
} |
|
|
|
int main(int argc, char **argv) { |
|
openlog("fan-daemon", LOG_NDELAY, LOG_USER); |
|
|
|
// Find PWM provider |
|
for (size_t i=0; i<NUM_PWM_PROVIDERS; i++) { |
|
if (pwm_providers[i] && pwm_providers[i]->probe()) { |
|
pwm_provider = pwm_providers[i]; |
|
break; |
|
} |
|
} |
|
|
|
if (!pwm_provider) { |
|
syslog(LOG_ERR, "Failed to autodetect platform! Exiting.\n"); |
|
return EXIT_FAILURE; |
|
} |
|
|
|
//struct controller_state *state = controller_init(); |
|
//print_zones(state); |
|
|
|
/** |
|
* Program state is controlled via signals. |
|
* SIGUSR1 - Fan control disabled, max out all zones. Used during IPL. |
|
* SIGUSR2 - Fan control enabled, read OCC sensors and follow curve. |
|
* |
|
* Create a signalfd to receive these. |
|
*/ |
|
sigset_t ss; |
|
sigemptyset(&ss); |
|
sigaddset(&ss, SIGUSR1); |
|
sigaddset(&ss, SIGUSR2); |
|
int sigfd = signalfd(-1, &ss, 0); |
|
if (sigfd < 0) { |
|
syslog(LOG_ERR, "Failed to create signalfd: %m. Exiting.\n"); |
|
return EXIT_FAILURE; |
|
} |
|
|
|
// Now that we can receive them via a signalfd, block them |
|
if (sigprocmask(SIG_BLOCK, &ss, NULL) < 0) { |
|
syslog(LOG_ERR, "Failed to block signals: %m. Exiting.\n"); |
|
return EXIT_FAILURE; |
|
} |
|
|
|
bool enable_control = false; |
|
bool fault = false; |
|
struct controller_state *state = NULL; |
|
for(;;) { |
|
// Get next mode |
|
bool next = get_next_mode(sigfd, enable_control) && !fault; |
|
|
|
// Check if mode changed and perform switch |
|
if (next != enable_control) { |
|
enable_control = next; |
|
if (next) { |
|
// Disable->Enable, re-initalize controller state |
|
dbg_printf("Enabling fan control!\n"); |
|
if (!(state = controller_init())) { |
|
syslog(LOG_ERR, "Failed to allocate state! Disabling fan control.\n"); |
|
enable_control = false; |
|
} |
|
} else { |
|
// Enable->Disable, destroy controller state |
|
dbg_printf("Disabling fan control!\n"); |
|
controller_destroy(state); |
|
} |
|
} |
|
|
|
// Clear any previous faults |
|
fault = false; |
|
|
|
// Control fans |
|
if (enable_control) { |
|
// Run control algorithm on all zones |
|
if (!update_zone(state, ZONE_CPU0)) |
|
fault = true; |
|
|
|
if (state->flags & STATE_FLAG_CPU1_PRESENT) |
|
if (!update_zone(state, ZONE_CPU1)) |
|
fault = true; |
|
|
|
if (!update_zone(state, ZONE_CHASSIS)) |
|
fault = true; |
|
} else { |
|
// Fan control disabled, max out all fans |
|
pwm_provider->set_zone_speed(ZONE_CPU0, 255); |
|
pwm_provider->set_zone_speed(ZONE_CPU1, 255); |
|
pwm_provider->set_zone_speed(ZONE_CHASSIS, 255); |
|
} |
|
} |
|
}
|
|
|