/*
 * ksyslog: In-kernel syslog receiver 
 * Copyright(C) 2013 Atzm WATANABE All rights reserved
 * Distributed under the GPL
 */

#include <linux/version.h>
#include <linux/module.h>
#include <linux/inet.h>
#include <linux/ip.h>
#include <linux/udp.h>
#include <linux/namei.h>
#include <linux/fsnotify.h>
#include <linux/proc_fs.h>
#include <linux/u64_stats_sync.h>
#include <linux/percpu.h>
#include <net/udp.h>
#include "compat.h"
#include "ksyslog.h"

static struct ksyslog_queue ksyslog_queue;
static struct socket *ksyslog_rcv_sk = NULL;

static struct delayed_work ksyslog_work;
static struct workqueue_struct *ksyslog_wq = NULL;

#ifdef CONFIG_PROC_FS
static struct proc_dir_entry *ksyslog_procdir = NULL;
static struct proc_dir_entry *ksyslog_proc_queue = NULL;
static struct proc_dir_entry *ksyslog_proc_size = NULL;
static struct proc_dir_entry *ksyslog_proc_stats = NULL;
#endif

static char *ksyslog_host = "0.0.0.0";
static ushort ksyslog_port = 514;
static char *ksyslog_path = "/var/log/ksyslog.log";
static ulong ksyslog_queue_size_max = 4096;
static ulong ksyslog_flush_interval = 45;  /* milliseconds */

static DEFINE_SPINLOCK(ksyslog_write_lock);

module_param(ksyslog_host, charp, 0444);
module_param(ksyslog_port, ushort, 0444);
module_param(ksyslog_path, charp, 0644);
module_param(ksyslog_queue_size_max, ulong, 0644);
module_param(ksyslog_flush_interval, ulong, 0644);

static int
ksyslog_queue_init(struct ksyslog_queue *queue)
{
	memset(queue, 0, sizeof(*queue));
	INIT_LIST_HEAD(&queue->head);
	spin_lock_init(&queue->lock);
	atomic64_set(&queue->size, 0);
	queue->stats = alloc_percpu(struct ksyslog_stats);
	if (unlikely(queue->stats == NULL))
		return -ENOMEM;
	return 0;
}

static void
ksyslog_queue_uninit(struct ksyslog_queue *queue)
{
	if (likely(queue->stats))
		free_percpu(queue->stats);
	queue->stats = NULL;
}

static int
ksyslog_close(struct file *file)
{
	int err;
	mm_segment_t oldfs;

	oldfs = get_fs();
	set_fs(get_ds());

	err = filp_close(file, NULL);

	set_fs(oldfs);
	return err;
}

static struct file *
ksyslog_open(const char *path)
{
	struct file *file;
	struct path ppath;
	mm_segment_t oldfs;

	oldfs = get_fs();
	set_fs(get_ds());

	if (unlikely(kern_path(path, LOOKUP_OPEN|LOOKUP_FOLLOW, &ppath)))
		file = filp_open(path, O_CREAT|O_WRONLY|O_APPEND|O_LARGEFILE, 0600);
	else
		file = filp_open(path, O_WRONLY|O_APPEND|O_LARGEFILE, 0);

	if (unlikely(IS_ERR(file)))
		goto out;

	compat_fsnotify_open(file);

	if (unlikely(S_ISDIR(file->f_path.dentry->d_inode->i_mode))) {
		ksyslog_close(file);
		file = ERR_PTR(-EISDIR);
		goto out;
	}

	if (unlikely(file->f_pos < 0)) {
		ksyslog_close(file);
		file = ERR_PTR(-EIO);
		goto out;
	}

out:
	set_fs(oldfs);
	return file;
}

static int
ksyslog_write(struct file *file, const char *buf, const size_t length)
{
	int err;
	mm_segment_t oldfs;

	oldfs = get_fs();
	set_fs(get_ds());

	err = vfs_write(file, (__force void __user *)buf, length, &file->f_pos);

	set_fs(oldfs);
	return err;
}

static void
ksyslog_drop_warning(const struct ksyslog_entry *entry)
{
	net_warn_ratelimited("ksyslog: dropped: %llu %s.%s %u.%u.%u.%u %.*s\n",
			     timeval_to_ns(&entry->tv) / 1000 / 1000 / 1000,
			     ksyslog_facility_str(entry->facility),
			     ksyslog_severity_str(entry->severity),
			     entry->saddr.addr8[0], entry->saddr.addr8[1],
			     entry->saddr.addr8[2], entry->saddr.addr8[3],
			     (int)entry->length, entry->data);
}

static struct ksyslog_entry *
ksyslog_entry_create(const struct sk_buff *skb,
		     const struct iphdr *iph, const struct udphdr *udph)
{
	struct ksyslog_entry *entry;
	unsigned int priority, facility, severity, month, day, hour, minute, second;
	unsigned char *start, month_s[4];
	struct tm tm;
	int length, i;

	if (sscanf(skb->data, "<%3u>%3s %2u %2u:%2u:%2u ",
		   &priority, month_s, &day, &hour, &minute, &second) != 6)
		return ERR_PTR(-EINVAL);

	start = memchr(skb->data, '>', 5);
	if (start == NULL)
		return ERR_PTR(-EINVAL);
	start++;

	facility = priority >> 3;
	severity = priority & 7;

	if (facility >= __KSYSLOG_F_MAX)
		return ERR_PTR(-EINVAL);
	if (severity >= __KSYSLOG_S_MAX)
		return ERR_PTR(-EINVAL);

	month = ksyslog_month_num(month_s);
	if (!month)
		return ERR_PTR(-EINVAL);
	if (day > 31)
		return ERR_PTR(-EINVAL);
	if (hour > 23)
		return ERR_PTR(-EINVAL);
	if (minute > 59)
		return ERR_PTR(-EINVAL);
	if (second > 59)
		return ERR_PTR(-EINVAL);

	entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
	if (unlikely(entry == NULL))
		return ERR_PTR(-ENOMEM);

	length = skb->len - (start - skb->data);
	entry->data = kzalloc(length, GFP_ATOMIC);
	if (unlikely(entry->data == NULL)) {
		kfree(entry);
		return ERR_PTR(-ENOMEM);
	}

	if (skb->tstamp.tv64)
		entry->tv = ktime_to_timeval(skb->tstamp);
	else
		do_gettimeofday(&entry->tv);

	time_to_tm(entry->tv.tv_sec, 0, &tm);
	entry->time = mktime(tm.tm_year + 1900, month, day, hour, minute, second);

	entry->priority = priority;
	entry->facility = facility;
	entry->severity = severity;

	entry->daddr.addr32 = iph->daddr;
	entry->saddr.addr32 = iph->saddr;

	entry->dport = udph->dest;
	entry->sport = udph->source;

	entry->length = length;
	memcpy(entry->data, start, length);

	for (i = 0; i < length; i++)
		if (unlikely(entry->data[i] == '\n'))
			entry->data[i] = ' ';

	return entry;
}

static void
ksyslog_entry_free(struct rcu_head *head)
{
	struct ksyslog_entry *entry = container_of(head, struct ksyslog_entry, rcu);
	kfree(entry->data);
	kfree(entry);
}

static int
ksyslog_entry_add(struct ksyslog_queue *queue, struct ksyslog_entry *entry)
{
	if (unlikely(atomic64_read(&queue->size) >= ksyslog_queue_size_max))
		return -ENOBUFS;
	list_add_tail_rcu(&entry->list, &queue->head);
	WARN_ON(atomic64_inc_return(&queue->size) > ksyslog_queue_size_max);
	return 0;
}

static void
ksyslog_entry_del(struct ksyslog_queue *queue, struct ksyslog_entry *entry, bool free)
{
	WARN_ON(atomic64_dec_return(&queue->size) < 0);
	list_del_rcu(&entry->list);
	if (free)
		call_rcu(&entry->rcu, ksyslog_entry_free);
}

static void
ksyslog_entry_destroy(struct ksyslog_queue *queue)
{
	struct ksyslog_entry *entry, *next;

	list_for_each_entry_safe(entry, next, &queue->head, list)
		ksyslog_entry_del(queue, entry, true);
}

static int
ksyslog_entry_format(char **buf, const struct ksyslog_entry *entry)
{
	*buf = kzalloc(54 + entry->length + 2, GFP_ATOMIC);
	if (unlikely(*buf == NULL))
		return -ENOMEM;

	return sprintf(*buf, "%llu %s.%s %u.%u.%u.%u %.*s\n",
		       timeval_to_ns(&entry->tv) / 1000 / 1000 / 1000,
		       ksyslog_facility_str(entry->facility),
		       ksyslog_severity_str(entry->severity),
		       entry->saddr.addr8[0], entry->saddr.addr8[1],
		       entry->saddr.addr8[2], entry->saddr.addr8[3],
		       (int)entry->length, entry->data);
}

static bool
ksyslog_entry_write(struct file *file, struct ksyslog_entry *entry)
{
	int length;
	char *buf;

	length = ksyslog_entry_format(&buf, entry);

	if (unlikely(length < 0))
		return false;

	if (unlikely(ksyslog_write(file, buf, length) != length)) {
		kfree(buf);
		return false;
	}

	kfree(buf);
	return true;
}

static void
ksyslog_work_register(unsigned long timer)
{
	queue_delayed_work(ksyslog_wq, &ksyslog_work, timer * HZ / 1000);
}

static void
ksyslog_work_unregister(void)
{
	cancel_delayed_work_sync(&ksyslog_work);
}

static void
ksyslog_work_handler(struct work_struct *work)
{
	struct file *file = NULL;
	struct ksyslog_entry *entry;

	file = ksyslog_open(ksyslog_path);
	if (unlikely(IS_ERR(file)))
		goto out;

	while (true) {
		bool write_ok;

		spin_lock_bh(&ksyslog_queue.lock);
		entry = list_first_or_null_rcu(&ksyslog_queue.head,
					       struct ksyslog_entry, list);
		if (!entry) {
			spin_unlock_bh(&ksyslog_queue.lock);
			break;
		}
		ksyslog_entry_del(&ksyslog_queue, entry, false);
		spin_unlock_bh(&ksyslog_queue.lock);

		spin_lock(&ksyslog_write_lock);
		write_ok = ksyslog_entry_write(file, entry);
		spin_unlock(&ksyslog_write_lock);

		if (likely(write_ok)) {
			ksyslog_stats_add_write(&ksyslog_queue, entry->length);
		} else {
			ksyslog_stats_add_drop(&ksyslog_queue, entry->length);
			ksyslog_drop_warning(entry);
		}

		call_rcu(&entry->rcu, ksyslog_entry_free);
	}

	ksyslog_close(file);

out:
	ksyslog_work_register(ksyslog_flush_interval);
}

static int
ksyslog_rcv(struct sock *sk, struct sk_buff *skb)
{
	int err;
	struct iphdr *iph;
	struct udphdr *udph;
	struct ksyslog_entry *entry;

	if (unlikely(skb_linearize(skb))) {
		ksyslog_stats_add_drop(&ksyslog_queue, skb->len);
		goto out;
	}

	iph = ip_hdr(skb);
	udph = udp_hdr(skb);

	if (unlikely(!skb_pull(skb, sizeof(*udph)))) {
		ksyslog_stats_add_drop(&ksyslog_queue, skb->len);
		goto out;
	}

	entry = ksyslog_entry_create(skb, iph, udph);
	if (unlikely(IS_ERR(entry))) {
		if (PTR_ERR(entry) == -EINVAL) {
			ksyslog_stats_add_discard(&ksyslog_queue, skb->len);
			goto out;
		}

		ksyslog_stats_add_drop(&ksyslog_queue, skb->len);
		goto out;
	}

	spin_lock_bh(&ksyslog_queue.lock);
	err = ksyslog_entry_add(&ksyslog_queue, entry);
	spin_unlock_bh(&ksyslog_queue.lock);

	if (unlikely(err)) {
		ksyslog_stats_add_drop(&ksyslog_queue, entry->length);
		ksyslog_drop_warning(entry);
		ksyslog_entry_free(&entry->rcu);
		goto out;
	}

out:
	consume_skb(skb);
	return 0;
}

#ifdef CONFIG_PROC_FS
static void *
ksyslog_rculist_seq_start(struct seq_file *seq, loff_t *pos)
{
	struct list_head *lh, *head = seq->private;
	loff_t ppos = *pos;

	rcu_read_lock();

	__list_for_each_rcu(lh, head)
		if (ppos-- == 0)
			return lh;

	return NULL;
}

static void *
ksyslog_rculist_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
	struct list_head *lh = rcu_dereference(((struct list_head *)v)->next);
	++(*pos);
	return lh == seq->private ? NULL : lh;
}

static void
ksyslog_rculist_seq_stop(struct seq_file *seq, void *v)
{
	rcu_read_unlock();
}

static int
ksyslog_queue_seq_show(struct seq_file *seq, void *v)
{
	const struct ksyslog_entry *entry = list_entry_rcu(v, struct ksyslog_entry, list);

	seq_printf(seq, "%llu %s.%s %u.%u.%u.%u %.*s\n",
		   timeval_to_ns(&entry->tv) / 1000 / 1000 / 1000,
		   ksyslog_facility_str(entry->facility),
		   ksyslog_severity_str(entry->severity),
		   entry->saddr.addr8[0], entry->saddr.addr8[1],
		   entry->saddr.addr8[2], entry->saddr.addr8[3],
		   (int)entry->length, entry->data);

	return 0;
}

static struct seq_operations ksyslog_queue_seq_ops = {
	.start = ksyslog_rculist_seq_start,
	.next  = ksyslog_rculist_seq_next,
	.stop  = ksyslog_rculist_seq_stop,
	.show  = ksyslog_queue_seq_show,
};

static int
ksyslog_queue_seq_open(struct inode *inode, struct file *file)
{
	int err = seq_open(file, &ksyslog_queue_seq_ops);

	if (!err)
		((struct seq_file *)file->private_data)->private = PDE_DATA(inode);

	return err;
}

static struct file_operations ksyslog_queue_fops = {
	.owner   = THIS_MODULE,
	.open    = ksyslog_queue_seq_open,
	.read    = seq_read,
	.llseek  = seq_lseek,
	.release = seq_release,
};

static int
ksyslog_size_seq_show(struct seq_file *seq, void *v)
{
	seq_printf(seq, "%lu\n", atomic64_read(&ksyslog_queue.size));
	return 0;
}

static int
ksyslog_size_seq_open(struct inode *inode, struct file *file)
{
	return single_open(file, ksyslog_size_seq_show, PDE_DATA(inode));
}

static int
ksyslog_stats_seq_show(struct seq_file *seq, void *v)
{
	int i;
	struct ksyslog_stats stats;

	memset(&stats, 0, sizeof(stats));

	for_each_possible_cpu(i) {
		const struct ksyslog_stats *percpu_stats;
		struct ksyslog_stats local_stats;
		unsigned int start;

		percpu_stats = per_cpu_ptr(ksyslog_queue.stats, i);

		do {
			start = u64_stats_fetch_begin_bh(&percpu_stats->sync);
			local_stats = *percpu_stats;
		} while (u64_stats_fetch_retry_bh(&percpu_stats->sync, start));

		stats.write_bytes += local_stats.write_bytes;
		stats.write_packets += local_stats.write_packets;
		stats.drop_bytes += local_stats.drop_bytes;
		stats.drop_packets += local_stats.drop_packets;
		stats.discard_bytes += local_stats.discard_bytes;
		stats.discard_packets += local_stats.discard_packets;
	}

	seq_puts(seq,   "{\n");
	seq_puts(seq,   "  \"write\": {\n");
	seq_printf(seq, "    \"bytes\":   \"%llu\",\n", stats.write_bytes);
	seq_printf(seq, "    \"packets\": \"%llu\"\n", stats.write_packets);
	seq_puts(seq,   "  },\n");
	seq_puts(seq,   "  \"drop\": {\n");
	seq_printf(seq, "    \"bytes\":   \"%llu\",\n", stats.drop_bytes);
	seq_printf(seq, "    \"packets\": \"%llu\"\n", stats.drop_packets);
	seq_puts(seq,   "  },\n");
	seq_puts(seq,   "  \"discard\": {\n");
	seq_printf(seq, "    \"bytes\":   \"%llu\",\n", stats.discard_bytes);
	seq_printf(seq, "    \"packets\": \"%llu\"\n", stats.discard_packets);
	seq_puts(seq,   "  }\n");
	seq_puts(seq,   "}\n");

	return 0;
}

static int
ksyslog_stats_seq_open(struct inode *inode, struct file *file)
{
	return single_open(file, ksyslog_stats_seq_show, PDE_DATA(inode));
}

static struct file_operations ksyslog_size_fops = {
	.owner   = THIS_MODULE,
	.open    = ksyslog_size_seq_open,
	.read    = seq_read,
	.llseek  = seq_lseek,
	.release = single_release,
};

static struct file_operations ksyslog_stats_fops = {
	.owner   = THIS_MODULE,
	.open    = ksyslog_stats_seq_open,
	.read    = seq_read,
	.llseek  = seq_lseek,
	.release = single_release,
};

static void
ksyslog_proc_destroy(void)
{
	if (ksyslog_proc_queue)
		remove_proc_entry("queue", ksyslog_procdir);
	ksyslog_proc_queue = NULL;

	if (ksyslog_proc_size)
		remove_proc_entry("size", ksyslog_procdir);
	ksyslog_proc_size = NULL;

	if (ksyslog_proc_stats)
		remove_proc_entry("stats", ksyslog_procdir);
	ksyslog_proc_stats = NULL;

	if (ksyslog_procdir)
		remove_proc_entry("ksyslog", NULL);
	ksyslog_procdir = NULL;
}

static int
ksyslog_proc_init(void)
{
	ksyslog_procdir = proc_mkdir("ksyslog", NULL);
	if (ksyslog_procdir == NULL) {
		pr_err("ksyslog: proc_mkdir failed\n");
		goto err;
	}

	ksyslog_proc_queue = proc_create_data("queue", S_IRUGO, ksyslog_procdir,
					      &ksyslog_queue_fops, &ksyslog_queue.head);
	if (ksyslog_proc_queue == NULL) {
		pr_err("ksyslog: proc_create(queue) failed\n");
		goto err;
	}

	ksyslog_proc_size = proc_create("size", S_IRUGO, ksyslog_procdir,
					&ksyslog_size_fops);
	if (ksyslog_proc_size == NULL) {
		pr_err("ksyslog: proc_create(size) failed\n");
		goto err;
	}

	ksyslog_proc_stats = proc_create("stats", S_IRUGO, ksyslog_procdir,
					 &ksyslog_stats_fops);
	if (ksyslog_proc_stats == NULL) {
		pr_err("ksyslog: proc_create(stats) failed\n");
		goto err;
	}

	return 0;

err:
	ksyslog_proc_destroy();
	return -ENOMEM;
}
#endif

static void
ksyslog_finish(void)
{
	if (ksyslog_rcv_sk)
		sock_release(ksyslog_rcv_sk);
	ksyslog_rcv_sk = NULL;

	if (ksyslog_wq) {
		ksyslog_work_unregister();
		destroy_workqueue(ksyslog_wq);
	}
	ksyslog_wq = NULL;

#ifdef CONFIG_PROC_FS
	ksyslog_proc_destroy();
#endif

	ksyslog_entry_destroy(&ksyslog_queue);
	rcu_barrier();

	ksyslog_queue_uninit(&ksyslog_queue);
}

static int __init
ksyslog_init(void)
{
	int err;
	struct sockaddr_in sin;

	err = ksyslog_queue_init(&ksyslog_queue);
	if (err)
		goto err;

#ifdef CONFIG_PROC_FS
	err = ksyslog_proc_init();
	if (err)
		goto err;
#endif

	ksyslog_wq = create_workqueue("ksyslog");
	if (ksyslog_wq == NULL) {
		pr_err("ksyslog: create_workqueue failed\n");
		err = -ENOMEM;
		goto err;
	}

	INIT_DELAYED_WORK(&ksyslog_work, ksyslog_work_handler);

	err = sock_create(AF_INET, SOCK_DGRAM, 0, &ksyslog_rcv_sk);
	if (err) {
		pr_err("ksyslog: sock_create failed\n");
		goto err;
	}

	sin.sin_family = AF_INET;
	sin.sin_addr.s_addr = in_aton(ksyslog_host);
	sin.sin_port = htons(ksyslog_port);

	err = kernel_bind(ksyslog_rcv_sk, (struct sockaddr *)&sin,
			  sizeof(struct sockaddr_in));
	if (err) {
		pr_err("ksyslog: kernel_bind failed\n");
		goto err;
	}

	ksyslog_work_register(ksyslog_flush_interval);

	udp_sk(ksyslog_rcv_sk->sk)->encap_type = UDP_ENCAP_KSYSLOG;
	udp_sk(ksyslog_rcv_sk->sk)->encap_rcv = ksyslog_rcv;
	udp_encap_enable();

	return 0;

err:
	ksyslog_finish();
	return err;
}

static void __exit
ksyslog_exit(void)
{
	ksyslog_finish();
}

module_init(ksyslog_init);
module_exit(ksyslog_exit);

MODULE_AUTHOR("Atzm WATANABE");
MODULE_DESCRIPTION("In-kernel syslog receiver");
MODULE_LICENSE("GPL");
