|
|
|
/**
|
|
|
|
* Copyright 2019 Shawn Anastasio
|
|
|
|
*
|
|
|
|
* This file is part of op-fan-daemon.
|
|
|
|
*
|
|
|
|
* op-fan-daemon is free software: you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
|
|
* (at your option) any later version.
|
|
|
|
*
|
|
|
|
* op-fan-daemon is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with op-fan-daemon. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/**
|
|
|
|
* A simple fan daemon for OpenPOWER systems
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <stdbool.h>
|
|
|
|
#include <stdint.h>
|
|
|
|
#include <errno.h>
|
|
|
|
#include <inttypes.h>
|
|
|
|
#include <assert.h>
|
|
|
|
|
|
|
|
#include <dirent.h>
|
|
|
|
#include <fcntl.h>
|
|
|
|
#include <signal.h>
|
|
|
|
#include <syslog.h>
|
|
|
|
#include <sys/select.h>
|
|
|
|
#include <sys/signalfd.h>
|
|
|
|
#include <sys/stat.h>
|
|
|
|
#include <sys/time.h>
|
|
|
|
#include <sys/types.h>
|
|
|
|
#include <unistd.h>
|
|
|
|
|
|
|
|
#define DECLARE_CURVES 1
|
|
|
|
#include "fan-daemon.h"
|
|
|
|
#include "control.h"
|
|
|
|
#include "curve.h"
|
|
|
|
#include "util.h"
|
|
|
|
|
|
|
|
#define REASONABLE_PATH_LEN 1024
|
|
|
|
|
|
|
|
const char cpu0_sensors_path[] = "/sys/devices/platform/gpio-fsi/fsi0/slave@00:00/00:00:00:06/sbefifo1-dev0";
|
|
|
|
const char cpu1_sensors_path[] = "/sys/devices/platform/gpio-fsi/fsi0/slave@00:00/00:00:00:0a/fsi1/slave@01:00/01:01:00:06/sbefifo2-dev0";
|
|
|
|
|
|
|
|
// Globally registered PWM providers
|
|
|
|
struct pwm_ops *pwm_providers[NUM_PWM_PROVIDERS] = { 0 };
|
|
|
|
struct pwm_ops *pwm_provider = NULL;
|
|
|
|
|
|
|
|
void install_pwm_provider(struct pwm_ops *ops) {
|
|
|
|
for (size_t i=0; i<NUM_PWM_PROVIDERS; i++) {
|
|
|
|
if (!pwm_providers[i]) {
|
|
|
|
pwm_providers[i] = ops;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fprintf(stderr, "FATAL: Unable to install pwm provider: not enough space.\n");
|
|
|
|
exit(EXIT_FAILURE);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Initialize a sensor group from a given path.
|
|
|
|
*
|
|
|
|
* @param path path to OCC sbefifo device in /sys
|
|
|
|
* @param[out] group_out sensor_group struct to initialize on success
|
|
|
|
* @return success?
|
|
|
|
*/
|
|
|
|
static bool open_sensor_group(const char *path, struct sensor_group *group_out) {
|
|
|
|
bool ret = false;
|
|
|
|
char pathbuf[REASONABLE_PATH_LEN];
|
|
|
|
char occ_hwmon_name[12 + 1 /* strlen("occ-hwmon.XX") */];
|
|
|
|
struct dirent *child;
|
|
|
|
|
|
|
|
DIR *base = opendir(path);
|
|
|
|
if (!base) {
|
|
|
|
syslog(LOG_ERR, "Couldn't open provided path!\n");
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Walk fs to get to occ-hwmon.*/hwmon directory
|
|
|
|
DIR *occ_hwmon = NULL;
|
|
|
|
while ((child = readdir(base))) {
|
|
|
|
if (child->d_name[0] == '.')
|
|
|
|
continue; // Skip hidden, ".", ".."
|
|
|
|
|
|
|
|
if (!strncmp("occ-hwmon.", child->d_name, 9)) {
|
|
|
|
snprintf(pathbuf, sizeof(pathbuf), "%s/%s/hwmon", path, child->d_name);
|
|
|
|
occ_hwmon = opendir(pathbuf);
|
|
|
|
|
|
|
|
strncpy(occ_hwmon_name, child->d_name, sizeof(occ_hwmon_name) - 1);
|
|
|
|
occ_hwmon_name[sizeof(occ_hwmon_name) - 1] = '\0';
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!occ_hwmon) {
|
|
|
|
syslog(LOG_ERR, "Couldn't find occ_hwmon!\n");
|
|
|
|
goto out_base;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Walk fs to get to hwmonN directory
|
|
|
|
DIR *hwmonN = NULL;
|
|
|
|
while ((child = readdir(occ_hwmon))) {
|
|
|
|
if (child->d_name[0] == '.')
|
|
|
|
continue; // Skip hidden, ".", ".."
|
|
|
|
|
|
|
|
if (!strncmp("hwmon", child->d_name, 5)) {
|
|
|
|
snprintf(pathbuf, sizeof(pathbuf), "%s/%s/hwmon/%s", path, occ_hwmon_name, child->d_name);
|
|
|
|
hwmonN = opendir(pathbuf);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!hwmonN) {
|
|
|
|
syslog(LOG_ERR, "Couldn't find hwmonN!\n");
|
|
|
|
goto out_occ_hwmon;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Successfully found and opened the appropriate hwmon directory, return it.
|
|
|
|
group_out->dir = hwmonN;
|
|
|
|
strncpy(group_out->dir_path, pathbuf, sizeof(group_out->dir_path) - 1);
|
|
|
|
group_out->dir_path[sizeof(group_out->dir_path) - 1] = '\0';
|
|
|
|
ret = true;
|
|
|
|
|
|
|
|
out_occ_hwmon:
|
|
|
|
closedir(occ_hwmon);
|
|
|
|
out_base:
|
|
|
|
closedir(base);
|
|
|
|
out:
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Populate a given list of sensors with all sensors in a given group that
|
|
|
|
* match a given FRU.
|
|
|
|
*
|
|
|
|
* @param list list to populate
|
|
|
|
* @param group sensors group to search in
|
|
|
|
* @param desired_fru FRU to filter by
|
|
|
|
* @return success?
|
|
|
|
*/
|
|
|
|
static bool populate_sensor_list(struct vec_int *list, struct sensor_group *group, uint8_t desired_fru) {
|
|
|
|
char pathbuf[REASONABLE_PATH_LEN + 256 /* d_name field is 256 bytes wide */];
|
|
|
|
struct dirent *child;
|
|
|
|
|
|
|
|
// Walk sensor directory for *_fru_type nodes and read them
|
|
|
|
rewinddir(group->dir);
|
|
|
|
while ((child = readdir(group->dir))) {
|
|
|
|
if (child->d_name[0] == '.')
|
|
|
|
continue; // Skip hidden, ".", ".."
|
|
|
|
|
|
|
|
if (!strstr(child->d_name, "_fru_type"))
|
|
|
|
continue; // Not what we're looking for
|
|
|
|
|
|
|
|
// Read FRU
|
|
|
|
snprintf(pathbuf, sizeof(pathbuf), "%s/%s", group->dir_path, child->d_name);
|
|
|
|
int type_fd = open(pathbuf, O_RDONLY);
|
|
|
|
if (type_fd < 0)
|
|
|
|
goto skip_sensor;
|
|
|
|
|
|
|
|
char fru_str[2 + 1 /* strlen("00") */];
|
|
|
|
ssize_t n;
|
|
|
|
if ((n = read(type_fd, fru_str, sizeof(fru_str) - 1)) < 0) {
|
|
|
|
close(type_fd);
|
|
|
|
goto skip_sensor;
|
|
|
|
}
|
|
|
|
fru_str[n] = '\0';
|
|
|
|
close(type_fd);
|
|
|
|
|
|
|
|
// Convert FRU to number
|
|
|
|
char *endptr;
|
|
|
|
long fru = strtol(fru_str, &endptr, 10);
|
|
|
|
if (*endptr != '\n' && *endptr != '\0')
|
|
|
|
goto skip_sensor;
|
|
|
|
|
|
|
|
// See if FRU matches
|
|
|
|
if (fru != desired_fru)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
// Open fd to this sensor's input file and append to list
|
|
|
|
uint8_t num;
|
|
|
|
sscanf(child->d_name, "temp%" SCNu8 "_fru_type", &num);
|
|
|
|
|
|
|
|
snprintf(pathbuf, sizeof(pathbuf), "%s/temp%" PRIu8 "_input", group->dir_path, num);
|
|
|
|
int sensor_fd = open(pathbuf, O_RDONLY);
|
|
|
|
if (sensor_fd < 0)
|
|
|
|
goto skip_sensor;
|
|
|
|
|
|
|
|
vec_int_push_back(list, sensor_fd);
|
|
|
|
|
|
|
|
continue;
|
|
|
|
skip_sensor:
|
|
|
|
syslog(LOG_INFO, "Failed to read sensor %s: %m.\n", child->d_name);
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Initialize sensors and platform PWM controller
|
|
|
|
*/
|
|
|
|
struct controller_state *controller_init(void) {
|
|
|
|
struct controller_state *state = calloc(1, sizeof(struct controller_state));
|
|
|
|
if (!state) {
|
|
|
|
syslog(LOG_ERR, "Unable to allocate program state: %m.\n");
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!vec_int_init(&state->zones[ZONE_CPU0].sensors, 50, close_destructor) ||
|
|
|
|
!vec_int_init(&state->zones[ZONE_CPU1].sensors, 50, close_destructor) ||
|
|
|
|
!vec_int_init(&state->zones[ZONE_CHASSIS].sensors, 50, close_destructor)) {
|
|
|
|
syslog(LOG_ERR, "Unable to allocate sensor lists: %m.\n");
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Select fan curves to use for each zone
|
|
|
|
state->zones[ZONE_CPU0].curve = zone_cpu0_curve;
|
|
|
|
state->zones[ZONE_CPU0].curve_size = ARRAY_SIZE(zone_cpu0_curve);
|
|
|
|
state->zones[ZONE_CPU0].curve_pos = ARRAY_SIZE(zone_cpu0_curve) - 1;
|
|
|
|
state->zones[ZONE_CPU0].next_curve_pos = 0;
|
|
|
|
state->zones[ZONE_CPU0].next_curve_count = 0;
|
|
|
|
|
|
|
|
state->zones[ZONE_CPU1].curve = zone_cpu1_curve;
|
|
|
|
state->zones[ZONE_CPU1].curve_size = ARRAY_SIZE(zone_cpu1_curve);
|
|
|
|
state->zones[ZONE_CPU1].curve_pos = ARRAY_SIZE(zone_cpu1_curve) - 1;
|
|
|
|
state->zones[ZONE_CPU1].next_curve_pos = 0;
|
|
|
|
state->zones[ZONE_CPU1].next_curve_count = 0;
|
|
|
|
|
|
|
|
state->zones[ZONE_CHASSIS].curve = zone_chassis_curve;
|
|
|
|
state->zones[ZONE_CHASSIS].curve_size = ARRAY_SIZE(zone_chassis_curve);
|
|
|
|
state->zones[ZONE_CHASSIS].curve_pos = ARRAY_SIZE(zone_chassis_curve) - 1;
|
|
|
|
state->zones[ZONE_CHASSIS].next_curve_pos = 0;
|
|
|
|
state->zones[ZONE_CHASSIS].next_curve_count = 0;
|
|
|
|
|
|
|
|
// Open sensor groups
|
|
|
|
if (!open_sensor_group(cpu0_sensors_path, &state->occ0)) {
|
|
|
|
syslog(LOG_ERR, "Unable to open OCC sensors for CPU0: %m.\n");
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!open_sensor_group(cpu1_sensors_path, &state->occ1))
|
|
|
|
syslog(LOG_INFO, "Unable to open OCC sensors for CPU1, assuming single socket system (%m).\n");
|
|
|
|
else
|
|
|
|
state->flags |= STATE_FLAG_CPU1_PRESENT;
|
|
|
|
|
|
|
|
|
|
|
|
// Populate zone sensor lists
|
|
|
|
if (!populate_sensor_list(&state->zones[ZONE_CPU0].sensors, &state->occ0, FRU_TYPE_CORE)) {
|
|
|
|
syslog(LOG_ERR, "Unable to populate sensor list for CPU0: %m.\n");
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!populate_sensor_list(&state->zones[ZONE_CHASSIS].sensors, &state->occ0, FRU_TYPE_DIMM)) {
|
|
|
|
syslog(LOG_ERR, "Unable to populate sensor list for Chassis (DIMM, OCC0): %m.\n");
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!populate_sensor_list(&state->zones[ZONE_CHASSIS].sensors, &state->occ0, FRU_TYPE_VRM_VDD)) {
|
|
|
|
syslog(LOG_ERR, "Unable to populate sensor list for Chassis (VRM_VDD, OCC0): %m.\n");
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
// If OCC1 is present, populate its sensors too
|
|
|
|
if (state->flags & STATE_FLAG_CPU1_PRESENT) {
|
|
|
|
if (!populate_sensor_list(&state->zones[ZONE_CPU1].sensors, &state->occ1, FRU_TYPE_CORE)) {
|
|
|
|
syslog(LOG_ERR, "Unable to populate sensor list for CPU1: %m.\n");
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!populate_sensor_list(&state->zones[ZONE_CHASSIS].sensors, &state->occ1, FRU_TYPE_DIMM)) {
|
|
|
|
syslog(LOG_ERR, "Unable to populate sensor list for Chassis (DIMM, OCC1): %m.\n");
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!populate_sensor_list(&state->zones[ZONE_CHASSIS].sensors, &state->occ1, FRU_TYPE_VRM_VDD)) {
|
|
|
|
syslog(LOG_ERR, "Unable to populate sensor list for Chassis (VRM_VDD, OCC1): %m.\n");
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return state;
|
|
|
|
|
|
|
|
fail:
|
|
|
|
// Cleanup any members that were initialized
|
|
|
|
if (state->occ0.dir)
|
|
|
|
closedir(state->occ0.dir);
|
|
|
|
if (state->occ1.dir)
|
|
|
|
closedir(state->occ1.dir);
|
|
|
|
for (size_t i=0; i<NUM_ZONES; i++) {
|
|
|
|
if (state->zones[i].sensors.data)
|
|
|
|
vec_int_destroy(&state->zones[i].sensors);
|
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void controller_destroy(struct controller_state *state) {
|
|
|
|
closedir(state->occ0.dir);
|
|
|
|
|
|
|
|
vec_int_destroy(&state->zones[ZONE_CPU0].sensors);
|
|
|
|
vec_int_destroy(&state->zones[ZONE_CHASSIS].sensors);
|
|
|
|
|
|
|
|
if (state->flags & STATE_FLAG_CPU1_PRESENT) {
|
|
|
|
closedir(state->occ1.dir);
|
|
|
|
vec_int_destroy(&state->zones[ZONE_CPU1].sensors);
|
|
|
|
}
|
|
|
|
|
|
|
|
free(state);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool get_next_mode(int sigfd, bool cur_mode) {
|
|
|
|
// select(2) on the signalfd or timeout
|
|
|
|
fd_set fds;
|
|
|
|
FD_ZERO(&fds);
|
|
|
|
FD_SET(sigfd, &fds);
|
|
|
|
struct timeval timeout = { .tv_usec = POLL_DELAY_US };
|
|
|
|
if (select(sigfd + 1, &fds, NULL, NULL, &timeout) < 0)
|
|
|
|
goto fail;
|
|
|
|
|
|
|
|
if (FD_ISSET(sigfd, &fds)) {
|
|
|
|
struct signalfd_siginfo siginfo;
|
|
|
|
if (read(sigfd, &siginfo, sizeof(siginfo)) < 0)
|
|
|
|
goto fail;
|
|
|
|
|
|
|
|
if (siginfo.ssi_signo == SIGUSR1)
|
|
|
|
return false; // Disable fan control
|
|
|
|
else if (siginfo.ssi_signo == SIGUSR2)
|
|
|
|
return true; // Enable fan control
|
|
|
|
else
|
|
|
|
syslog(LOG_WARNING, "Unknown signal received (%d). Ignoring.\n", siginfo.ssi_signo);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Timeout reached or unknown signal, return current mode
|
|
|
|
return cur_mode;
|
|
|
|
|
|
|
|
fail:
|
|
|
|
syslog(LOG_ERR, "Error encountered while polling: %m. Disabling fan control.\n");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
int main(int argc, char **argv) {
|
|
|
|
openlog("fan-daemon", LOG_NDELAY, LOG_USER);
|
|
|
|
|
|
|
|
// Find PWM provider
|
|
|
|
for (size_t i=0; i<NUM_PWM_PROVIDERS; i++) {
|
|
|
|
if (pwm_providers[i] && pwm_providers[i]->probe()) {
|
|
|
|
pwm_provider = pwm_providers[i];
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!pwm_provider) {
|
|
|
|
syslog(LOG_ERR, "Failed to autodetect platform! Exiting.\n");
|
|
|
|
return EXIT_FAILURE;
|
|
|
|
}
|
|
|
|
|
|
|
|
//struct controller_state *state = controller_init();
|
|
|
|
//print_zones(state);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Program state is controlled via signals.
|
|
|
|
* SIGUSR1 - Fan control disabled, max out all zones. Used during IPL.
|
|
|
|
* SIGUSR2 - Fan control enabled, read OCC sensors and follow curve.
|
|
|
|
*
|
|
|
|
* Create a signalfd to receive these.
|
|
|
|
*/
|
|
|
|
sigset_t ss;
|
|
|
|
sigemptyset(&ss);
|
|
|
|
sigaddset(&ss, SIGUSR1);
|
|
|
|
sigaddset(&ss, SIGUSR2);
|
|
|
|
int sigfd = signalfd(-1, &ss, 0);
|
|
|
|
if (sigfd < 0) {
|
|
|
|
syslog(LOG_ERR, "Failed to create signalfd: %m. Exiting.\n");
|
|
|
|
return EXIT_FAILURE;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Now that we can receive them via a signalfd, block them
|
|
|
|
if (sigprocmask(SIG_BLOCK, &ss, NULL) < 0) {
|
|
|
|
syslog(LOG_ERR, "Failed to block signals: %m. Exiting.\n");
|
|
|
|
return EXIT_FAILURE;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool enable_control = false;
|
|
|
|
bool fault = false;
|
|
|
|
struct controller_state *state = NULL;
|
|
|
|
for(;;) {
|
|
|
|
// Get next mode
|
|
|
|
bool next = get_next_mode(sigfd, enable_control) && !fault;
|
|
|
|
|
|
|
|
// Check if mode changed and perform switch
|
|
|
|
if (next != enable_control) {
|
|
|
|
enable_control = next;
|
|
|
|
if (next) {
|
|
|
|
// Disable->Enable, re-initalize controller state
|
|
|
|
dbg_printf("Enabling fan control!\n");
|
|
|
|
if (!(state = controller_init())) {
|
|
|
|
syslog(LOG_ERR, "Failed to allocate state! Disabling fan control.\n");
|
|
|
|
enable_control = false;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// Enable->Disable, destroy controller state
|
|
|
|
dbg_printf("Disabling fan control!\n");
|
|
|
|
controller_destroy(state);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Clear any previous faults
|
|
|
|
fault = false;
|
|
|
|
|
|
|
|
// Control fans
|
|
|
|
if (enable_control) {
|
|
|
|
// Run control algorithm on all zones
|
|
|
|
if (!update_zone(state, ZONE_CPU0))
|
|
|
|
fault = true;
|
|
|
|
|
|
|
|
if (state->flags & STATE_FLAG_CPU1_PRESENT)
|
|
|
|
if (!update_zone(state, ZONE_CPU1))
|
|
|
|
fault = true;
|
|
|
|
|
|
|
|
if (!update_zone(state, ZONE_CHASSIS))
|
|
|
|
fault = true;
|
|
|
|
} else {
|
|
|
|
// Fan control disabled, max out all fans
|
|
|
|
pwm_provider->set_zone_speed(ZONE_CPU0, 255);
|
|
|
|
pwm_provider->set_zone_speed(ZONE_CPU1, 255);
|
|
|
|
pwm_provider->set_zone_speed(ZONE_CHASSIS, 255);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|