From 9144f9630b211908dceaafb29e59060146b9011c Mon Sep 17 00:00:00 2001
From: Cameron Sparr <cameronsparr@gmail.com>
Date: Wed, 25 May 2016 16:44:17 +0100
Subject: [PATCH] graphite parser: support multiple tag keys

closes #1272
---
 CHANGELOG.md                            |   1 +
 docs/DATA_FORMATS_INPUT.md              |  74 +++++++++-------
 plugins/parsers/graphite/parser.go      |  35 +++++---
 plugins/parsers/graphite/parser_test.go | 110 +++++++++++++++++++++++-
 4 files changed, 177 insertions(+), 43 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ab81f1584a50e..1c02fbd89e2a6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,7 @@ time before a new metric is included by the plugin.
 - [#1247](https://github.com/influxdata/telegraf/pull/1247): rollbar input plugin. Thanks @francois2metz and @cduez!
 - [#1208](https://github.com/influxdata/telegraf/pull/1208): Standardized AWS credentials evaluation & wildcard CloudWatch dimensions. Thanks @johnrengelman!
 - [#1264](https://github.com/influxdata/telegraf/pull/1264): Add SSL config options to http_response plugin.
+- [#1272](https://github.com/influxdata/telegraf/pull/1272): graphite parser: add ability to specify multiple tag keys, for consistency with influxdb parser.
 
 ### Bugfixes
 
diff --git a/docs/DATA_FORMATS_INPUT.md b/docs/DATA_FORMATS_INPUT.md
index 7d3fbf5de47f1..2e3a479ac7e9b 100644
--- a/docs/DATA_FORMATS_INPUT.md
+++ b/docs/DATA_FORMATS_INPUT.md
@@ -186,49 +186,59 @@ name of the plugin.
 # Graphite:
 
 The Graphite data format translates graphite _dot_ buckets directly into
-telegraf measurement names, with a single value field, and without any tags. For
-more advanced options, Telegraf supports specifying "templates" to translate
+telegraf measurement names, with a single value field, and without any tags.
+By default, the separator is left as ".", but this can be changed using the
+"separator" argument. For more advanced options,
+Telegraf supports specifying "templates" to translate
 graphite buckets into Telegraf metrics.
 
-#### Separator:
-
-You can specify a separator to use for the parsed metrics.
-By default, it will leave the metrics with a "." separator.
-Setting `separator = "_"` will translate:
+Templates are of the form:
 
 ```
-cpu.usage.idle 99
-=> cpu_usage_idle value=99
+"host.mytag.mytag.measurement.measurement.field*"
 ```
 
-#### Measurement/Tag Templates:
+Where the following keywords exist:
+
+1. `measurement`: specifies that this section of the graphite bucket corresponds
+to the measurement name. This can be specified multiple times.
+2. `field`: specifies that this section of the graphite bucket corresponds
+to the field name. This can be specified multiple times.
+3. `measurement*`: specifies that all remaining elements of the graphite bucket
+correspond to the measurement name.
+4. `field*`: specifies that all remaining elements of the graphite bucket
+correspond to the field name.
+
+Any part of the template that is not a keyword is treated as a tag key. This
+can also be specified multiple times.
+
+NOTE: `field*` cannot be used in conjunction with `measurement*`!
+
+#### Measurement & Tag Templates:
 
 The most basic template is to specify a single transformation to apply to all
-incoming metrics. _measurement_ is a special keyword that tells Telegraf which
-parts of the graphite bucket to combine into the measurement name. It can have a
-trailing `*` to indicate that the remainder of the metric should be used.
-Other words are considered tag keys. So the following template:
+incoming metrics. So the following template:
 
 ```toml
 templates = [
-    "region.measurement*"
+    "region.region.measurement*"
 ]
 ```
 
 would result in the following Graphite -> Telegraf transformation.
 
 ```
-us-west.cpu.load 100
-=> cpu.load,region=us-west value=100
+us.west.cpu.load 100
+=> cpu.load,region=us.west value=100
 ```
 
 #### Field Templates:
 
-There is also a _field_ keyword, which can only be specified once.
 The field keyword tells Telegraf to give the metric that field name.
 So the following template:
 
 ```toml
+separator = "_"
 templates = [
     "measurement.measurement.field.field.region"
 ]
@@ -237,24 +247,26 @@ templates = [
 would result in the following Graphite -> Telegraf transformation.
 
 ```
-cpu.usage.idle.percent.us-west 100
-=> cpu_usage,region=us-west idle_percent=100
+cpu.usage.idle.percent.eu-east 100
+=> cpu_usage,region=eu-east idle_percent=100
 ```
 
-The field key can also be derived from the second "half" of the input metric-name by specifying ```field*```:
+The field key can also be derived from all remaining elements of the graphite
+bucket by specifying `field*`:
+
 ```toml
+separator = "_"
 templates = [
     "measurement.measurement.region.field*"
 ]
 ```
 
-would result in the following Graphite -> Telegraf transformation.
+which would result in the following Graphite -> Telegraf transformation.
 
 ```
-cpu.usage.us-west.idle.percentage 100
-=> cpu_usage,region=us-west idle_percentage=100
+cpu.usage.eu-east.idle.percentage 100
+=> cpu_usage,region=eu-east idle_percentage=100
 ```
-(This cannot be used in conjunction with "measurement*"!)
 
 #### Filter Templates:
 
@@ -271,8 +283,8 @@ templates = [
 which would result in the following transformation:
 
 ```
-cpu.load.us-west 100
-=> cpu_load,region=us-west value=100
+cpu.load.eu-east 100
+=> cpu_load,region=eu-east value=100
 
 mem.cached.localhost 256
 => mem_cached,host=localhost value=256
@@ -294,8 +306,8 @@ templates = [
 would result in the following Graphite -> Telegraf transformation.
 
 ```
-cpu.usage.idle.us-west 100
-=> cpu_usage,region=us-west,datacenter=1a idle=100
+cpu.usage.idle.eu-east 100
+=> cpu_usage,region=eu-east,datacenter=1a idle=100
 ```
 
 There are many more options available,
@@ -326,12 +338,12 @@ There are many more options available,
   ## similar to the line protocol format. There can be only one default template.
   ## Templates support below format:
   ## 1. filter + template
-  ## 2. filter + template + extra tag
+  ## 2. filter + template + extra tag(s)
   ## 3. filter + template with field key
   ## 4. default template
   templates = [
     "*.app env.service.resource.measurement",
-    "stats.* .host.measurement* region=us-west,agent=sensu",
+    "stats.* .host.measurement* region=eu-east,agent=sensu",
     "stats2.* .host.measurement.field",
     "measurement*"
   ]
diff --git a/plugins/parsers/graphite/parser.go b/plugins/parsers/graphite/parser.go
index 8c31cd760c3d9..d371274df974e 100644
--- a/plugins/parsers/graphite/parser.go
+++ b/plugins/parsers/graphite/parser.go
@@ -133,7 +133,7 @@ func (p *GraphiteParser) Parse(buf []byte) ([]telegraf.Metric, error) {
 	}
 
 	if errStr != "" {
-		return metrics, fmt.Errorf(errStr)
+		return metrics, fmt.Errorf(strings.TrimSpace(errStr))
 	}
 	return metrics, nil
 }
@@ -267,13 +267,13 @@ func (t *template) Apply(line string) (string, map[string]string, string, error)
 	fields := strings.Split(line, ".")
 	var (
 		measurement []string
-		tags        = make(map[string]string)
+		tags        = make(map[string][]string)
 		field       []string
 	)
 
 	// Set any default tags
 	for k, v := range t.defaultTags {
-		tags[k] = v
+		tags[k] = append(tags[k], v)
 	}
 
 	// See if an invalid combination has been specified in the template:
@@ -285,30 +285,43 @@ func (t *template) Apply(line string) (string, map[string]string, string, error)
 		}
 	}
 	if t.greedyField && t.greedyMeasurement {
-		return "", nil, "", fmt.Errorf("either 'field*' or 'measurement*' can be used in each template (but not both together): %q", strings.Join(t.tags, t.separator))
+		return "", nil, "",
+			fmt.Errorf("either 'field*' or 'measurement*' can be used in each "+
+				"template (but not both together): %q",
+				strings.Join(t.tags, t.separator))
 	}
 
 	for i, tag := range t.tags {
 		if i >= len(fields) {
 			continue
 		}
+		if tag == "" {
+			continue
+		}
 
-		if tag == "measurement" {
+		switch tag {
+		case "measurement":
 			measurement = append(measurement, fields[i])
-		} else if tag == "field" {
+		case "field":
 			field = append(field, fields[i])
-		} else if tag == "field*" {
+		case "field*":
 			field = append(field, fields[i:]...)
 			break
-		} else if tag == "measurement*" {
+		case "measurement*":
 			measurement = append(measurement, fields[i:]...)
 			break
-		} else if tag != "" {
-			tags[tag] = fields[i]
+		default:
+			tags[tag] = append(tags[tag], fields[i])
 		}
 	}
 
-	return strings.Join(measurement, t.separator), tags, strings.Join(field, t.separator), nil
+	// Convert to map of strings.
+	outtags := make(map[string]string)
+	for k, values := range tags {
+		outtags[k] = strings.Join(values, t.separator)
+	}
+
+	return strings.Join(measurement, t.separator), outtags, strings.Join(field, t.separator), nil
 }
 
 // matcher determines which template should be applied to a given metric
diff --git a/plugins/parsers/graphite/parser_test.go b/plugins/parsers/graphite/parser_test.go
index 5200cfbdd443a..55f1a9e2b8129 100644
--- a/plugins/parsers/graphite/parser_test.go
+++ b/plugins/parsers/graphite/parser_test.go
@@ -61,6 +61,13 @@ func TestTemplateApply(t *testing.T) {
 			measurement: "cpu",
 			tags:        map[string]string{"hostname": "server01", "region": "us-west"},
 		},
+		{
+			test:        "metric with multiple tags",
+			input:       "server01.example.org.cpu.us-west",
+			template:    "hostname.hostname.hostname.measurement.region",
+			measurement: "cpu",
+			tags:        map[string]string{"hostname": "server01.example.org", "region": "us-west"},
+		},
 		{
 			test: "no metric",
 			tags: make(map[string]string),
@@ -142,7 +149,7 @@ func TestParseMissingMeasurement(t *testing.T) {
 	}
 }
 
-func TestParse(t *testing.T) {
+func TestParseLine(t *testing.T) {
 	testTime := time.Now().Round(time.Second)
 	epochTime := testTime.Unix()
 	strTime := strconv.FormatInt(epochTime, 10)
@@ -243,6 +250,107 @@ func TestParse(t *testing.T) {
 	}
 }
 
+func TestParse(t *testing.T) {
+	testTime := time.Now().Round(time.Second)
+	epochTime := testTime.Unix()
+	strTime := strconv.FormatInt(epochTime, 10)
+
+	var tests = []struct {
+		test        string
+		input       []byte
+		measurement string
+		tags        map[string]string
+		value       float64
+		time        time.Time
+		template    string
+		err         string
+	}{
+		{
+			test:        "normal case",
+			input:       []byte(`cpu.foo.bar 50 ` + strTime),
+			template:    "measurement.foo.bar",
+			measurement: "cpu",
+			tags: map[string]string{
+				"foo": "foo",
+				"bar": "bar",
+			},
+			value: 50,
+			time:  testTime,
+		},
+		{
+			test:        "metric only with float value",
+			input:       []byte(`cpu 50.554 ` + strTime),
+			measurement: "cpu",
+			template:    "measurement",
+			value:       50.554,
+			time:        testTime,
+		},
+		{
+			test:     "missing metric",
+			input:    []byte(`1419972457825`),
+			template: "measurement",
+			err:      `received "1419972457825" which doesn't have required fields`,
+		},
+		{
+			test:     "should error parsing invalid float",
+			input:    []byte(`cpu 50.554z 1419972457825`),
+			template: "measurement",
+			err:      `field "cpu" value: strconv.ParseFloat: parsing "50.554z": invalid syntax`,
+		},
+		{
+			test:     "should error parsing invalid int",
+			input:    []byte(`cpu 50z 1419972457825`),
+			template: "measurement",
+			err:      `field "cpu" value: strconv.ParseFloat: parsing "50z": invalid syntax`,
+		},
+		{
+			test:     "should error parsing invalid time",
+			input:    []byte(`cpu 50.554 14199724z57825`),
+			template: "measurement",
+			err:      `field "cpu" time: strconv.ParseFloat: parsing "14199724z57825": invalid syntax`,
+		},
+		{
+			test:     "measurement* and field* (invalid)",
+			input:    []byte(`prod.us-west.server01.cpu.util.idle.percent 99.99 1419972457825`),
+			template: "env.zone.host.measurement*.field*",
+			err:      `either 'field*' or 'measurement*' can be used in each template (but not both together): "env.zone.host.measurement*.field*"`,
+		},
+	}
+
+	for _, test := range tests {
+		p, err := NewGraphiteParser("", []string{test.template}, nil)
+		if err != nil {
+			t.Fatalf("unexpected error creating graphite parser: %v", err)
+		}
+
+		metrics, err := p.Parse(test.input)
+		if errstr(err) != test.err {
+			t.Fatalf("err does not match.  expected [%v], got [%v]", test.err, err)
+		}
+		if err != nil {
+			// If we erred out,it was intended and the following tests won't work
+			continue
+		}
+		if metrics[0].Name() != test.measurement {
+			t.Fatalf("name parse failer.  expected %v, got %v",
+				test.measurement, metrics[0].Name())
+		}
+		if len(metrics[0].Tags()) != len(test.tags) {
+			t.Fatalf("tags len mismatch.  expected %d, got %d",
+				len(test.tags), len(metrics[0].Tags()))
+		}
+		f := metrics[0].Fields()["value"].(float64)
+		if metrics[0].Fields()["value"] != f {
+			t.Fatalf("floatValue value mismatch.  expected %v, got %v",
+				test.value, f)
+		}
+		if metrics[0].Time().UnixNano()/1000000 != test.time.UnixNano()/1000000 {
+			t.Fatalf("time value mismatch.  expected %v, got %v",
+				test.time.UnixNano(), metrics[0].Time().UnixNano())
+		}
+	}
+}
+
 func TestParseNaN(t *testing.T) {
 	p, err := NewGraphiteParser("", []string{"measurement*"}, nil)
 	assert.NoError(t, err)