Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Logger: add watchdog to boost its priority in case of a busy-looping task #9404

Merged
merged 7 commits into from
May 18, 2018
1 change: 1 addition & 0 deletions src/modules/logger/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ px4_add_module(
log_writer.cpp
log_writer_file.cpp
log_writer_mavlink.cpp
watchdog.cpp
DEPENDS
platforms__common
modules__uORB
Expand Down
7 changes: 7 additions & 0 deletions src/modules/logger/log_writer.h
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,13 @@ class LogWriter
return 0;
}

pthread_t thread_id_file() const
{
if (_log_writer_file) { return _log_writer_file->thread_id(); }

return (pthread_t)0;
}


/**
* Indicate to the underlying backend whether future write_message() calls need a reliable
Expand Down
9 changes: 8 additions & 1 deletion src/modules/logger/log_writer_file.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@
#include <systemlib/hardfault_log.h>
#endif /* __PX4_NUTTX */

using namespace time_literals;


namespace px4
{
namespace logger
Expand Down Expand Up @@ -229,6 +232,7 @@ void LogWriterFile::run()

int poll_count = 0;
int written = 0;
hrt_abstime last_fsync = hrt_absolute_time();

while (true) {
size_t available = 0;
Expand Down Expand Up @@ -263,12 +267,15 @@ void LogWriterFile::run()
written = ::write(_fd, read_ptr, available);
perf_end(_perf_write);

const hrt_abstime now = hrt_absolute_time();

/* call fsync periodically to minimize potential loss of data */
if (++poll_count >= 100) {
if (++poll_count >= 100 || now - last_fsync > 1_s) {
perf_begin(_perf_fsync);
::fsync(_fd);
perf_end(_perf_fsync);
poll_count = 0;
last_fsync = now;
}

if (written < 0) {
Expand Down
2 changes: 2 additions & 0 deletions src/modules/logger/log_writer_file.h
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,8 @@ class LogWriterFile
return _need_reliable_transfer;
}

pthread_t thread_id() const { return _thread; }

private:
static void *run_helper(void *);

Expand Down
90 changes: 66 additions & 24 deletions src/modules/logger/logger.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include <px4_config.h>
#include "logger.h"
#include "messages.h"
#include "watchdog.h"

#include <dirent.h>
#include <sys/stat.h>
Expand Down Expand Up @@ -88,10 +89,24 @@

using namespace px4::logger;


struct timer_callback_data_s {
px4_sem_t semaphore;

watchdog_data_t watchdog_data;
volatile bool watchdog_triggered = false;
};

/* This is used to schedule work for the logger (periodic scan for updated topics) */
static void timer_callback(void *arg)
{
px4_sem_t *semaphore = (px4_sem_t *)arg;
/* Note: we are in IRQ context here (on NuttX) */

timer_callback_data_s *data = (timer_callback_data_s *)arg;

if (watchdog_update(data->watchdog_data)) {
data->watchdog_triggered = true;
}

/* check the value of the semaphore: if the logger cannot keep up with running it's main loop as fast
* as the timer_callback here increases the semaphore count, the counter would increase unbounded,
Expand All @@ -101,11 +116,12 @@ static void timer_callback(void *arg)
* multiple iterations at once, the next time it's scheduled). */
int semaphore_value;

if (px4_sem_getvalue(semaphore, &semaphore_value) == 0 && semaphore_value > 1) {
if (px4_sem_getvalue(&data->semaphore, &semaphore_value) == 0 && semaphore_value > 1) {
return;
}

px4_sem_post(semaphore);
px4_sem_post(&data->semaphore);

}


Expand Down Expand Up @@ -922,12 +938,11 @@ void Logger::run()
}

/* init the update timer */
struct hrt_call timer_call;
memset(&timer_call, 0, sizeof(hrt_call));
px4_sem_t timer_semaphore;
px4_sem_init(&timer_semaphore, 0, 0);
struct hrt_call timer_call{};
timer_callback_data_s timer_callback_data;
px4_sem_init(&timer_callback_data.semaphore, 0, 0);
/* timer_semaphore use case is a signal */
px4_sem_setprotocol(&timer_semaphore, SEM_PRIO_NONE);
px4_sem_setprotocol(&timer_callback_data.semaphore, SEM_PRIO_NONE);

int polling_topic_sub = -1;

Expand All @@ -939,7 +954,18 @@ void Logger::run()
}

} else {
hrt_call_every(&timer_call, _log_interval, _log_interval, timer_callback, &timer_semaphore);

if (_writer.backend() & LogWriter::BackendFile) {

const pid_t pid_self = getpid();
const pthread_t writer_thread = _writer.thread_id_file();

// sched_note_start is already called from pthread_create and task_create,
// which means we can expect to find the tasks in system_load.tasks, as required in watchdog_initialize
watchdog_initialize(pid_self, writer_thread, timer_callback_data.watchdog_data);
}

hrt_call_every(&timer_call, _log_interval, _log_interval, timer_callback, &timer_callback_data);
}

// check for new subscription data
Expand Down Expand Up @@ -976,7 +1002,7 @@ void Logger::run()

} else {
// delayed stop: we measure the process loads and then stop
initialize_load_output();
initialize_load_output(PrintLoadReason::Postflight);
_should_stop_file_log = true;
}
}
Expand Down Expand Up @@ -1015,6 +1041,11 @@ void Logger::run()
}


if (timer_callback_data.watchdog_triggered) {
timer_callback_data.watchdog_triggered = false;
initialize_load_output(PrintLoadReason::Watchdog);
}


const hrt_abstime loop_time = hrt_absolute_time();

Expand All @@ -1026,12 +1057,12 @@ void Logger::run()

if (_should_stop_file_log) {
_should_stop_file_log = false;
write_load_output(false);
write_load_output();
stop_log_file();
continue; // skip to next loop iteration

} else {
write_load_output(true);
write_load_output();
}
}

Expand Down Expand Up @@ -1200,14 +1231,14 @@ void Logger::run()
* And on linux this is quite accurate as well, but under NuttX it is not accurate,
* because usleep() has only a granularity of CONFIG_MSEC_PER_TICK (=1ms).
*/
while (px4_sem_wait(&timer_semaphore) != 0);
while (px4_sem_wait(&timer_callback_data.semaphore) != 0);
}
}

stop_log_file();

hrt_cancel(&timer_call);
px4_sem_destroy(&timer_semaphore);
px4_sem_destroy(&timer_callback_data.semaphore);

// stop the writer thread
_writer.thread_stop();
Expand Down Expand Up @@ -1482,7 +1513,7 @@ void Logger::start_log_file()

_start_time_file = hrt_absolute_time();

initialize_load_output();
initialize_load_output(PrintLoadReason::Preflight);
}

void Logger::stop_log_file()
Expand Down Expand Up @@ -1518,7 +1549,7 @@ void Logger::start_log_mavlink()
_writer.unselect_write_backend();
_writer.notify();

initialize_load_output();
initialize_load_output(PrintLoadReason::Preflight);
}

void Logger::stop_log_mavlink()
Expand Down Expand Up @@ -1576,18 +1607,26 @@ void Logger::print_load_callback(void *user)
return;
}

if (callback_data->preflight) {
perf_name = "perf_top_preflight";

} else {
perf_name = "perf_top_postflight";
switch (callback_data->logger->_print_load_reason) {
case PrintLoadReason::Preflight:
perf_name = "perf_top_preflight";
break;
case PrintLoadReason::Postflight:
perf_name = "perf_top_postflight";
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great!

break;
case PrintLoadReason::Watchdog:
perf_name = "perf_top_watchdog";
break;
default:
perf_name = "perf_top";
break;
}

callback_data->logger->write_info_multiple(perf_name, callback_data->buffer, callback_data->counter != 0);
++callback_data->counter;
}

void Logger::initialize_load_output()
void Logger::initialize_load_output(PrintLoadReason reason)
{
perf_callback_data_t callback_data;
callback_data.logger = this;
Expand All @@ -1599,16 +1638,19 @@ void Logger::initialize_load_output()
// this will not yet print anything
print_load_buffer(curr_time, buffer, sizeof(buffer), print_load_callback, &callback_data, &_load);
_next_load_print = curr_time + 1000000;
_print_load_reason = reason;
}

void Logger::write_load_output(bool preflight)
void Logger::write_load_output()
{
if (_print_load_reason == PrintLoadReason::Watchdog) {
PX4_ERR("Writing watchdog data"); // this is just that we see it easily in the log
}
perf_callback_data_t callback_data = {};
char buffer[140];
callback_data.logger = this;
callback_data.counter = 0;
callback_data.buffer = buffer;
callback_data.preflight = preflight;
hrt_abstime curr_time = hrt_absolute_time();
_writer.set_need_reliable_transfer(true);
// TODO: maybe we should restrict the output to a selected backend (eg. when file logging is running
Expand Down
11 changes: 9 additions & 2 deletions src/modules/logger/logger.h
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,12 @@ class Logger : public ModuleBase<Logger>

private:

enum class PrintLoadReason {
Preflight,
Postflight,
Watchdog
};

/**
* Write an ADD_LOGGED_MSG to the log for a all current subscriptions and instances
*/
Expand Down Expand Up @@ -297,12 +303,12 @@ class Logger : public ModuleBase<Logger>
/**
* initialize the output for the process load, so that ~1 second later it will be written to the log
*/
void initialize_load_output();
void initialize_load_output(PrintLoadReason reason);

/**
* write the process load, which was previously initialized with initialize_load_output()
*/
void write_load_output(bool preflight);
void write_load_output();


static constexpr size_t MAX_TOPICS_NUM = 64; /**< Maximum number of logged topics */
Expand Down Expand Up @@ -344,6 +350,7 @@ class Logger : public ModuleBase<Logger>
will be stopped after load printing */
print_load_s _load{}; ///< process load data
hrt_abstime _next_load_print{0}; ///< timestamp when to print the process load
PrintLoadReason _print_load_reason;

// control
param_t _sdlog_profile_handle{PARAM_INVALID};
Expand Down
Loading