From 5a6b44240a40afadba62276114c826647388830c Mon Sep 17 00:00:00 2001 From: Jesse Szwedko Date: Fri, 25 Aug 2023 17:33:22 -0700 Subject: [PATCH] enhancement(kubernetes_logs source): Expose `oldest_first` (#18376) * enhancement(kubernetes_logs source): Expose `oldest_first` In response to https://github.com/vectordotdev/vector/issues/18088#issuecomment-1690491190 Might close: https://github.com/vectordotdev/vector/issues/18088 Signed-off-by: Jesse Szwedko * Improve descirptions Signed-off-by: Jesse Szwedko --------- Signed-off-by: Jesse Szwedko --- src/sources/file.rs | 6 ++++- src/sources/kubernetes_logs/mod.rs | 22 ++++++++++++++----- .../components/sources/base/file.cue | 10 +++++++-- .../sources/base/kubernetes_logs.cue | 9 ++++++-- 4 files changed, 37 insertions(+), 10 deletions(-) diff --git a/src/sources/file.rs b/src/sources/file.rs index bb1cc2301fa360..f0fd0be30811b9 100644 --- a/src/sources/file.rs +++ b/src/sources/file.rs @@ -205,7 +205,11 @@ pub struct FileConfig { #[serde(default)] pub multiline: Option, - /// An approximate limit on the amount of data read from a single file at a given time. + /// Max amount of bytes to read from a single file before switching over to the next file. + /// **Note:** This does not apply when `oldest_first` is `true. + /// + /// This allows distributing the reads more or less evenly across + /// the files. #[serde(default = "default_max_read_bytes")] #[configurable(metadata(docs::type_unit = "bytes"))] pub max_read_bytes: usize, diff --git a/src/sources/kubernetes_logs/mod.rs b/src/sources/kubernetes_logs/mod.rs index 817bd09ad6ddfa..890d05586436bf 100644 --- a/src/sources/kubernetes_logs/mod.rs +++ b/src/sources/kubernetes_logs/mod.rs @@ -163,14 +163,18 @@ pub struct Config { #[configurable(metadata(docs::human_name = "Ignore Files Older Than"))] ignore_older_secs: Option, - /// Max amount of bytes to read from a single file before switching over - /// to the next file. + /// Max amount of bytes to read from a single file before switching over to the next file. + /// **Note:** This does not apply when `oldest_first` is `true. /// /// This allows distributing the reads more or less evenly across /// the files. #[configurable(metadata(docs::type_unit = "bytes"))] max_read_bytes: usize, + /// Instead of balancing read capacity fairly across all watched files, prioritize draining the oldest files before moving on to read data from younger files. + #[serde(default = "default_oldest_first")] + pub oldest_first: bool, + /// The maximum number of bytes a line can contain before being discarded. /// /// This protects against malformed lines or tailing incorrect files. @@ -264,6 +268,7 @@ impl Default for Config { read_from: default_read_from(), ignore_older_secs: None, max_read_bytes: default_max_read_bytes(), + oldest_first: default_oldest_first(), max_line_bytes: default_max_line_bytes(), fingerprint_lines: default_fingerprint_lines(), glob_minimum_cooldown_ms: default_glob_minimum_cooldown_ms(), @@ -516,6 +521,7 @@ struct Source { read_from: ReadFrom, ignore_older_secs: Option, max_read_bytes: usize, + oldest_first: bool, max_line_bytes: usize, fingerprint_lines: usize, glob_minimum_cooldown: Duration, @@ -593,6 +599,7 @@ impl Source { read_from: ReadFrom::from(config.read_from), ignore_older_secs: config.ignore_older_secs, max_read_bytes: config.max_read_bytes, + oldest_first: config.oldest_first, max_line_bytes: config.max_line_bytes, fingerprint_lines: config.fingerprint_lines, glob_minimum_cooldown, @@ -624,6 +631,7 @@ impl Source { read_from, ignore_older_secs, max_read_bytes, + oldest_first, max_line_bytes, fingerprint_lines, glob_minimum_cooldown, @@ -763,9 +771,7 @@ impl Source { max_line_length: max_line_bytes, ignore_not_found: true, }, - // We'd like to consume rotated pod log files first to release our file handle and let - // the space be reclaimed - oldest_first: true, + oldest_first, // We do not remove the log files, `kubelet` is responsible for it. remove_after: None, // The standard emitter. @@ -944,6 +950,12 @@ const fn default_max_read_bytes() -> usize { 2048 } +// We'd like to consume rotated pod log files first to release our file handle and let +// the space be reclaimed +const fn default_oldest_first() -> bool { + true +} + const fn default_max_line_bytes() -> usize { // NOTE: The below comment documents an incorrect assumption, see // https://github.com/vectordotdev/vector/issues/6967 diff --git a/website/cue/reference/components/sources/base/file.cue b/website/cue/reference/components/sources/base/file.cue index 17b32bb7648e04..bee8ec0fbd7e0d 100644 --- a/website/cue/reference/components/sources/base/file.cue +++ b/website/cue/reference/components/sources/base/file.cue @@ -228,8 +228,14 @@ base: components: sources: file: configuration: { } } max_read_bytes: { - description: "An approximate limit on the amount of data read from a single file at a given time." - required: false + description: """ + Max amount of bytes to read from a single file before switching over to the next file. + **Note:** This does not apply when `oldest_first` is `true. + + This allows distributing the reads more or less evenly across + the files. + """ + required: false type: uint: { default: 2048 unit: "bytes" diff --git a/website/cue/reference/components/sources/base/kubernetes_logs.cue b/website/cue/reference/components/sources/base/kubernetes_logs.cue index ede1f4ca7bff5d..4d0d37c15496fa 100644 --- a/website/cue/reference/components/sources/base/kubernetes_logs.cue +++ b/website/cue/reference/components/sources/base/kubernetes_logs.cue @@ -165,8 +165,8 @@ base: components: sources: kubernetes_logs: configuration: { } max_read_bytes: { description: """ - Max amount of bytes to read from a single file before switching over - to the next file. + Max amount of bytes to read from a single file before switching over to the next file. + **Note:** This does not apply when `oldest_first` is `true. This allows distributing the reads more or less evenly across the files. @@ -209,6 +209,11 @@ base: components: sources: kubernetes_logs: configuration: { } } } + oldest_first: { + description: "Instead of balancing read capacity fairly across all watched files, prioritize draining the oldest files before moving on to read data from younger files." + required: false + type: bool: default: true + } pod_annotation_fields: { description: "Configuration for how the events are enriched with Pod metadata." required: false