A simple fan daemon for OpenPOWER systems
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

440 lines
14 KiB

3 years ago
/**
* Copyright 2019 Shawn Anastasio
*
* This file is part of op-fan-daemon.
*
* op-fan-daemon is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* op-fan-daemon is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with op-fan-daemon. If not, see <https://www.gnu.org/licenses/>.
*/
/**
* A simple fan daemon for OpenPOWER systems
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#include <stdint.h>
#include <errno.h>
#include <inttypes.h>
#include <assert.h>
#include <dirent.h>
#include <fcntl.h>
#include <signal.h>
#include <syslog.h>
#include <sys/select.h>
#include <sys/signalfd.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/types.h>
#include <unistd.h>
#define DECLARE_CURVES 1
#include "fan-daemon.h"
#include "control.h"
#include "curve.h"
#include "util.h"
#define REASONABLE_PATH_LEN 1024
const char cpu0_sensors_path[] = "/sys/devices/platform/gpio-fsi/fsi0/slave@00:00/00:00:00:06/sbefifo1-dev0";
const char cpu1_sensors_path[] = "/sys/devices/platform/gpio-fsi/fsi0/slave@00:00/00:00:00:0a/fsi1/slave@01:00/01:01:00:06/sbefifo2-dev0";
// Globally registered PWM providers
struct pwm_ops *pwm_providers[NUM_PWM_PROVIDERS] = { 0 };
struct pwm_ops *pwm_provider = NULL;
void install_pwm_provider(struct pwm_ops *ops) {
for (size_t i=0; i<NUM_PWM_PROVIDERS; i++) {
if (!pwm_providers[i]) {
pwm_providers[i] = ops;
return;
}
}
fprintf(stderr, "FATAL: Unable to install pwm provider: not enough space.\n");
exit(EXIT_FAILURE);
}
/**
* Initialize a sensor group from a given path.
*
* @param path path to OCC sbefifo device in /sys
* @param[out] group_out sensor_group struct to initialize on success
* @return success?
*/
static bool open_sensor_group(const char *path, struct sensor_group *group_out) {
bool ret = false;
char pathbuf[REASONABLE_PATH_LEN];
char occ_hwmon_name[12 + 1 /* strlen("occ-hwmon.XX") */];
struct dirent *child;
DIR *base = opendir(path);
if (!base) {
syslog(LOG_ERR, "Couldn't open provided path!\n");
goto out;
}
// Walk fs to get to occ-hwmon.*/hwmon directory
DIR *occ_hwmon = NULL;
while ((child = readdir(base))) {
if (child->d_name[0] == '.')
continue; // Skip hidden, ".", ".."
if (!strncmp("occ-hwmon.", child->d_name, 9)) {
snprintf(pathbuf, sizeof(pathbuf), "%s/%s/hwmon", path, child->d_name);
occ_hwmon = opendir(pathbuf);
strncpy(occ_hwmon_name, child->d_name, sizeof(occ_hwmon_name) - 1);
occ_hwmon_name[sizeof(occ_hwmon_name) - 1] = '\0';
break;
}
}
if (!occ_hwmon) {
syslog(LOG_ERR, "Couldn't find occ_hwmon!\n");
goto out_base;
}
// Walk fs to get to hwmonN directory
DIR *hwmonN = NULL;
while ((child = readdir(occ_hwmon))) {
if (child->d_name[0] == '.')
continue; // Skip hidden, ".", ".."
if (!strncmp("hwmon", child->d_name, 5)) {
snprintf(pathbuf, sizeof(pathbuf), "%s/%s/hwmon/%s", path, occ_hwmon_name, child->d_name);
hwmonN = opendir(pathbuf);
break;
}
}
if (!hwmonN) {
syslog(LOG_ERR, "Couldn't find hwmonN!\n");
goto out_occ_hwmon;
}
// Successfully found and opened the appropriate hwmon directory, return it.
group_out->dir = hwmonN;
strncpy(group_out->dir_path, pathbuf, sizeof(group_out->dir_path) - 1);
group_out->dir_path[sizeof(group_out->dir_path) - 1] = '\0';
ret = true;
out_occ_hwmon:
closedir(occ_hwmon);
out_base:
closedir(base);
out:
return ret;
}
/**
* Populate a given list of sensors with all sensors in a given group that
* match a given FRU.
*
* @param list list to populate
* @param group sensors group to search in
* @param desired_fru FRU to filter by
* @return success?
*/
static bool populate_sensor_list(struct vec_int *list, struct sensor_group *group, uint8_t desired_fru) {
char pathbuf[REASONABLE_PATH_LEN + 256 /* d_name field is 256 bytes wide */];
struct dirent *child;
// Walk sensor directory for *_fru_type nodes and read them
rewinddir(group->dir);
while ((child = readdir(group->dir))) {
if (child->d_name[0] == '.')
continue; // Skip hidden, ".", ".."
if (!strstr(child->d_name, "_fru_type"))
continue; // Not what we're looking for
// Read FRU
snprintf(pathbuf, sizeof(pathbuf), "%s/%s", group->dir_path, child->d_name);
int type_fd = open(pathbuf, O_RDONLY);
if (type_fd < 0)
goto skip_sensor;
char fru_str[2 + 1 /* strlen("00") */];
ssize_t n;
if ((n = read(type_fd, fru_str, sizeof(fru_str) - 1)) < 0) {
close(type_fd);
goto skip_sensor;
}
fru_str[n] = '\0';
close(type_fd);
// Convert FRU to number
char *endptr;
long fru = strtol(fru_str, &endptr, 10);
if (*endptr != '\n' && *endptr != '\0')
goto skip_sensor;
// See if FRU matches
if (fru != desired_fru)
continue;
// Open fd to this sensor's input file and append to list
uint8_t num;
sscanf(child->d_name, "temp%" SCNu8 "_fru_type", &num);
snprintf(pathbuf, sizeof(pathbuf), "%s/temp%" PRIu8 "_input", group->dir_path, num);
int sensor_fd = open(pathbuf, O_RDONLY);
if (sensor_fd < 0)
goto skip_sensor;
vec_int_push_back(list, sensor_fd);
continue;
skip_sensor:
syslog(LOG_INFO, "Failed to read sensor %s: %m.\n", child->d_name);
}
return true;
}
/**
* Initialize sensors and platform PWM controller
*/
struct controller_state *controller_init(void) {
struct controller_state *state = calloc(1, sizeof(struct controller_state));
if (!state) {
syslog(LOG_ERR, "Unable to allocate program state: %m.\n");
return NULL;
}
if (!vec_int_init(&state->zones[ZONE_CPU0].sensors, 50, close_destructor) ||
!vec_int_init(&state->zones[ZONE_CPU1].sensors, 50, close_destructor) ||
!vec_int_init(&state->zones[ZONE_CHASSIS].sensors, 50, close_destructor)) {
syslog(LOG_ERR, "Unable to allocate sensor lists: %m.\n");
goto fail;
}
// Select fan curves to use for each zone
state->zones[ZONE_CPU0].curve = zone_cpu0_curve;
state->zones[ZONE_CPU0].curve_size = ARRAY_SIZE(zone_cpu0_curve);
state->zones[ZONE_CPU0].curve_pos = ARRAY_SIZE(zone_cpu0_curve) - 1;
state->zones[ZONE_CPU0].next_curve_pos = 0;
state->zones[ZONE_CPU0].next_curve_count = 0;
3 years ago
state->zones[ZONE_CPU1].curve = zone_cpu1_curve;
state->zones[ZONE_CPU1].curve_size = ARRAY_SIZE(zone_cpu1_curve);
state->zones[ZONE_CPU1].curve_pos = ARRAY_SIZE(zone_cpu1_curve) - 1;
state->zones[ZONE_CPU1].next_curve_pos = 0;
state->zones[ZONE_CPU1].next_curve_count = 0;
3 years ago
state->zones[ZONE_CHASSIS].curve = zone_chassis_curve;
state->zones[ZONE_CHASSIS].curve_size = ARRAY_SIZE(zone_chassis_curve);
state->zones[ZONE_CHASSIS].curve_pos = ARRAY_SIZE(zone_chassis_curve) - 1;
state->zones[ZONE_CHASSIS].next_curve_pos = 0;
state->zones[ZONE_CHASSIS].next_curve_count = 0;
3 years ago
// Open sensor groups
if (!open_sensor_group(cpu0_sensors_path, &state->occ0)) {
syslog(LOG_ERR, "Unable to open OCC sensors for CPU0: %m.\n");
goto fail;
}
if (!open_sensor_group(cpu1_sensors_path, &state->occ1))
syslog(LOG_INFO, "Unable to open OCC sensors for CPU1, assuming single socket system (%m).\n");
else
state->flags |= STATE_FLAG_CPU1_PRESENT;
// Populate zone sensor lists
if (!populate_sensor_list(&state->zones[ZONE_CPU0].sensors, &state->occ0, FRU_TYPE_CORE)) {
syslog(LOG_ERR, "Unable to populate sensor list for CPU0: %m.\n");
goto fail;
}
if (!populate_sensor_list(&state->zones[ZONE_CHASSIS].sensors, &state->occ0, FRU_TYPE_DIMM)) {
syslog(LOG_ERR, "Unable to populate sensor list for Chassis (DIMM, OCC0): %m.\n");
goto fail;
}
if (!populate_sensor_list(&state->zones[ZONE_CHASSIS].sensors, &state->occ0, FRU_TYPE_VRM_VDD)) {
syslog(LOG_ERR, "Unable to populate sensor list for Chassis (VRM_VDD, OCC0): %m.\n");
goto fail;
}
// If OCC1 is present, populate its sensors too
if (state->flags & STATE_FLAG_CPU1_PRESENT) {
if (!populate_sensor_list(&state->zones[ZONE_CPU1].sensors, &state->occ1, FRU_TYPE_CORE)) {
syslog(LOG_ERR, "Unable to populate sensor list for CPU1: %m.\n");
goto fail;
}
if (!populate_sensor_list(&state->zones[ZONE_CHASSIS].sensors, &state->occ1, FRU_TYPE_DIMM)) {
syslog(LOG_ERR, "Unable to populate sensor list for Chassis (DIMM, OCC1): %m.\n");
goto fail;
}
if (!populate_sensor_list(&state->zones[ZONE_CHASSIS].sensors, &state->occ1, FRU_TYPE_VRM_VDD)) {
syslog(LOG_ERR, "Unable to populate sensor list for Chassis (VRM_VDD, OCC1): %m.\n");
goto fail;
}
}
return state;
fail:
// Cleanup any members that were initialized
if (state->occ0.dir)
closedir(state->occ0.dir);
if (state->occ1.dir)
closedir(state->occ1.dir);
for (size_t i=0; i<NUM_ZONES; i++) {
if (state->zones[i].sensors.data)
vec_int_destroy(&state->zones[i].sensors);
}
return NULL;
}
static void controller_destroy(struct controller_state *state) {
closedir(state->occ0.dir);
vec_int_destroy(&state->zones[ZONE_CPU0].sensors);
vec_int_destroy(&state->zones[ZONE_CHASSIS].sensors);
if (state->flags & STATE_FLAG_CPU1_PRESENT) {
closedir(state->occ1.dir);
vec_int_destroy(&state->zones[ZONE_CPU1].sensors);
}
free(state);
}
bool get_next_mode(int sigfd, bool cur_mode) {
// select(2) on the signalfd or timeout
fd_set fds;
FD_ZERO(&fds);
FD_SET(sigfd, &fds);
struct timeval timeout = { .tv_usec = POLL_DELAY_US };
if (select(sigfd + 1, &fds, NULL, NULL, &timeout) < 0)
goto fail;
if (FD_ISSET(sigfd, &fds)) {
struct signalfd_siginfo siginfo;
if (read(sigfd, &siginfo, sizeof(siginfo)) < 0)
goto fail;
if (siginfo.ssi_signo == SIGUSR1)
return false; // Disable fan control
else if (siginfo.ssi_signo == SIGUSR2)
return true; // Enable fan control
else
syslog(LOG_WARNING, "Unknown signal received (%d). Ignoring.\n", siginfo.ssi_signo);
}
// Timeout reached or unknown signal, return current mode
return cur_mode;
fail:
syslog(LOG_ERR, "Error encountered while polling: %m. Disabling fan control.\n");
return false;
}
int main(int argc, char **argv) {
openlog("fan-daemon", LOG_NDELAY, LOG_USER);
// Find PWM provider
for (size_t i=0; i<NUM_PWM_PROVIDERS; i++) {
if (pwm_providers[i] && pwm_providers[i]->probe()) {
pwm_provider = pwm_providers[i];
break;
}
}
if (!pwm_provider) {
syslog(LOG_ERR, "Failed to autodetect platform! Exiting.\n");
return EXIT_FAILURE;
}
//struct controller_state *state = controller_init();
//print_zones(state);
/**
* Program state is controlled via signals.
* SIGUSR1 - Fan control disabled, max out all zones. Used during IPL.
* SIGUSR2 - Fan control enabled, read OCC sensors and follow curve.
*
* Create a signalfd to receive these.
*/
sigset_t ss;
sigemptyset(&ss);
sigaddset(&ss, SIGUSR1);
sigaddset(&ss, SIGUSR2);
int sigfd = signalfd(-1, &ss, 0);
if (sigfd < 0) {
syslog(LOG_ERR, "Failed to create signalfd: %m. Exiting.\n");
return EXIT_FAILURE;
}
// Now that we can receive them via a signalfd, block them
if (sigprocmask(SIG_BLOCK, &ss, NULL) < 0) {
syslog(LOG_ERR, "Failed to block signals: %m. Exiting.\n");
return EXIT_FAILURE;
}
bool enable_control = false;
bool fault = false;
3 years ago
struct controller_state *state = NULL;
for(;;) {
// Get next mode
bool next = get_next_mode(sigfd, enable_control) && !fault;
3 years ago
// Check if mode changed and perform switch
if (next != enable_control) {
enable_control = next;
if (next) {
// Disable->Enable, re-initalize controller state
dbg_printf("Enabling fan control!\n");
if (!(state = controller_init())) {
syslog(LOG_ERR, "Failed to allocate state! Disabling fan control.\n");
enable_control = false;
}
} else {
// Enable->Disable, destroy controller state
dbg_printf("Disabling fan control!\n");
controller_destroy(state);
}
}
// Clear any previous faults
fault = false;
3 years ago
// Control fans
if (enable_control) {
// Run control algorithm on all zones
if (!update_zone(state, ZONE_CPU0))
fault = true;
3 years ago
if (state->flags & STATE_FLAG_CPU1_PRESENT)
if (!update_zone(state, ZONE_CPU1))
fault = true;
3 years ago
if (!update_zone(state, ZONE_CHASSIS))
fault = true;
3 years ago
} else {
// Fan control disabled, max out all fans
pwm_provider->set_zone_speed(ZONE_CPU0, 255);
pwm_provider->set_zone_speed(ZONE_CPU1, 255);
pwm_provider->set_zone_speed(ZONE_CHASSIS, 255);
}
}
}