Skip to content

Commit c871c87

Browse files
Merge pull request #140 from amd/development
dev -> main
2 parents 4d0e724 + cfd3f0e commit c871c87

File tree

70 files changed

+4308
-271
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

70 files changed

+4308
-271
lines changed

README.md

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ system debug.
1313
- ['describe' subcommand](#describe-subcommand)
1414
- ['run-plugins' sub command](#run-plugins-sub-command)
1515
- ['gen-plugin-config' sub command](#gen-plugin-config-sub-command)
16+
- ['compare-runs' subcommand](#compare-runs-subcommand)
1617
- ['summary' sub command](#summary-sub-command)
1718
- [Configs](#configs)
1819
- [Global args](#global-args)
@@ -292,6 +293,32 @@ Save this to `dmesg_custom_config.json` and run:
292293
node-scraper --plugin-configs dmesg_custom_config.json run-plugins DmesgPlugin
293294
```
294295
296+
#### **'compare-runs' subcommand**
297+
The `compare-runs` subcommand compares datamodels from two run log directories (e.g. two
298+
`nodescraper_log_*` folders). By default, all plugins with data in both runs are compared.
299+
300+
**Basic usage:**
301+
```sh
302+
node-scraper compare-runs <path1> <path2>
303+
```
304+
305+
**Exclude specific plugins from the comparison** with `--skip-plugins`:
306+
```sh
307+
node-scraper compare-runs path1 path2 --skip-plugins SomePlugin
308+
```
309+
310+
**Compare only certain plugins** with `--include-plugins`:
311+
```sh
312+
node-scraper compare-runs path1 path2 --include-plugins DmesgPlugin
313+
```
314+
315+
**Show full diff output** (no truncation of the Message column or limit on number of errors) with `--dont-truncate`:
316+
```sh
317+
node-scraper compare-runs path1 path2 --include-plugins DmesgPlugin --dont-truncate
318+
```
319+
320+
You can pass multiple plugin names to `--skip-plugins` or `--include-plugins`.
321+
295322
#### **'summary' sub command**
296323
The 'summary' subcommand can be used to combine results from multiple runs of node-scraper to a
297324
single summary.csv file. Sample run:

docs/PLUGIN_DOC.md

Lines changed: 131 additions & 10 deletions
Large diffs are not rendered by default.

nodescraper/base/inbanddataplugin.py

Lines changed: 137 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,17 +23,152 @@
2323
# SOFTWARE.
2424
#
2525
###############################################################################
26-
from typing import Generic
26+
import json
27+
import os
28+
from pathlib import Path
29+
from typing import Any, Generic, Optional
2730

2831
from nodescraper.connection.inband import InBandConnectionManager, SSHConnectionParams
2932
from nodescraper.generictypes import TAnalyzeArg, TCollectArg, TDataModel
3033
from nodescraper.interfaces import DataPlugin
34+
from nodescraper.models import DataModel
35+
from nodescraper.utils import pascal_to_snake
3136

3237

3338
class InBandDataPlugin(
3439
DataPlugin[InBandConnectionManager, SSHConnectionParams, TDataModel, TCollectArg, TAnalyzeArg],
3540
Generic[TDataModel, TCollectArg, TAnalyzeArg],
3641
):
37-
"""Base class for in band plugins"""
42+
"""Base class for in band plugins."""
3843

3944
CONNECTION_TYPE = InBandConnectionManager
45+
46+
@classmethod
47+
def find_datamodel_path_in_run(cls, run_path: str) -> Optional[str]:
48+
"""Find this plugin's collector datamodel file under a scraper run directory.
49+
50+
Args:
51+
run_path: Path to a scraper log run directory (e.g. scraper_logs_*).
52+
53+
Returns:
54+
Absolute path to the datamodel file, or None if not found.
55+
"""
56+
run_path = os.path.abspath(run_path)
57+
if not os.path.isdir(run_path):
58+
return None
59+
collector_cls = getattr(cls, "COLLECTOR", None)
60+
data_model_cls = getattr(cls, "DATA_MODEL", None)
61+
if not collector_cls or not data_model_cls:
62+
return None
63+
collector_dir = os.path.join(
64+
run_path,
65+
pascal_to_snake(cls.__name__),
66+
pascal_to_snake(collector_cls.__name__),
67+
)
68+
if not os.path.isdir(collector_dir):
69+
return None
70+
result_path = os.path.join(collector_dir, "result.json")
71+
if not os.path.isfile(result_path):
72+
return None
73+
try:
74+
res_payload = json.loads(Path(result_path).read_text(encoding="utf-8"))
75+
if res_payload.get("parent") != cls.__name__:
76+
return None
77+
except (json.JSONDecodeError, OSError):
78+
return None
79+
want_json = data_model_cls.__name__.lower() + ".json"
80+
for fname in os.listdir(collector_dir):
81+
low = fname.lower()
82+
if low.endswith("datamodel.json") or low == want_json:
83+
return os.path.join(collector_dir, fname)
84+
if low.endswith(".log"):
85+
return os.path.join(collector_dir, fname)
86+
return None
87+
88+
@classmethod
89+
def load_datamodel_from_path(cls, dm_path: str) -> Optional[TDataModel]:
90+
"""Load this plugin's DATA_MODEL from a file path (JSON or .log).
91+
92+
Args:
93+
dm_path: Path to datamodel JSON or to a .log file (if DATA_MODEL
94+
implements import_model for that format).
95+
96+
Returns:
97+
Instance of DATA_MODEL or None if load fails.
98+
"""
99+
dm_path = os.path.abspath(dm_path)
100+
if not os.path.isfile(dm_path):
101+
return None
102+
data_model_cls = getattr(cls, "DATA_MODEL", None)
103+
if not data_model_cls:
104+
return None
105+
try:
106+
if dm_path.lower().endswith(".log"):
107+
import_model = getattr(data_model_cls, "import_model", None)
108+
if not callable(import_model):
109+
return None
110+
base_import = getattr(DataModel.import_model, "__func__", DataModel.import_model)
111+
if getattr(import_model, "__func__", import_model) is base_import:
112+
return None
113+
return import_model(dm_path)
114+
with open(dm_path, encoding="utf-8") as f:
115+
data = json.load(f)
116+
return data_model_cls.model_validate(data)
117+
except (json.JSONDecodeError, OSError, Exception):
118+
return None
119+
120+
@classmethod
121+
def get_extracted_errors(cls, data_model: DataModel) -> Optional[list[str]]:
122+
"""Compute extracted errors from datamodel for compare-runs (in memory only).
123+
124+
Args:
125+
data_model: Loaded DATA_MODEL instance.
126+
127+
Returns:
128+
Sorted list of error match strings, or None if not applicable.
129+
"""
130+
get_content = getattr(data_model, "get_compare_content", None)
131+
if not callable(get_content):
132+
return None
133+
try:
134+
content = get_content()
135+
except Exception:
136+
return None
137+
if not isinstance(content, str):
138+
return None
139+
analyzer_cls = getattr(cls, "ANALYZER", None)
140+
if not analyzer_cls:
141+
return None
142+
get_matches = getattr(analyzer_cls, "get_error_matches", None)
143+
if not callable(get_matches):
144+
return None
145+
try:
146+
matches = get_matches(content)
147+
return sorted(matches) if matches is not None else None
148+
except Exception:
149+
return None
150+
151+
@classmethod
152+
def load_run_data(cls, run_path: str) -> Optional[dict[str, Any]]:
153+
"""Load this plugin's run data from a scraper run directory for comparison.
154+
155+
Args:
156+
run_path: Path to a scraper log run directory or to a datamodel file.
157+
158+
Returns:
159+
Dict suitable for diffing with another run, or None if not found.
160+
"""
161+
run_path = os.path.abspath(run_path)
162+
if not os.path.exists(run_path):
163+
return None
164+
dm_path = run_path if os.path.isfile(run_path) else cls.find_datamodel_path_in_run(run_path)
165+
if not dm_path:
166+
return None
167+
data_model = cls.load_datamodel_from_path(dm_path)
168+
if data_model is None:
169+
return None
170+
out = data_model.model_dump(mode="json")
171+
extracted = cls.get_extracted_errors(data_model)
172+
if extracted is not None:
173+
out["extracted_errors"] = extracted
174+
return out

nodescraper/base/regexanalyzer.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,29 @@ def count(self, val: int):
5555
class RegexAnalyzer(DataAnalyzer[TDataModel, TAnalyzeArg]):
5656
"""Parent class for all regex based data analyzers."""
5757

58-
# Class variable for timestamp pattern - can be overridden in subclasses
5958
TIMESTAMP_PATTERN: re.Pattern = re.compile(r"(\d{4}-\d+-\d+T\d+:\d+:\d+,\d+[+-]\d+:\d+)")
59+
ERROR_REGEX: list[ErrorRegex] = []
60+
61+
@classmethod
62+
def get_error_matches(cls, content: str) -> set[str]:
63+
"""Extract all error match strings from content using the analyzer's ERROR_REGEX.
64+
Args:
65+
content: Raw log text.
66+
Returns:
67+
Set of normalized error match strings.
68+
"""
69+
matches: set[str] = set()
70+
for error_regex_obj in getattr(cls, "ERROR_REGEX", []):
71+
for match in error_regex_obj.regex.findall(content):
72+
if isinstance(match, str) and "\n" in match:
73+
normalized = match.strip()
74+
elif isinstance(match, (tuple, list)):
75+
normalized = "\n".join(m for m in match if m)
76+
else:
77+
normalized = str(match).strip() if match else ""
78+
if normalized:
79+
matches.add(normalized)
80+
return matches
6081

6182
def _extract_timestamp_from_match_position(
6283
self, content: str, match_start: int

nodescraper/cli/cli.py

Lines changed: 53 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
from typing import Optional
3434

3535
import nodescraper
36+
from nodescraper.cli.compare_runs import run_compare_runs
3637
from nodescraper.cli.constants import DEFAULT_CONFIG, META_VAR_MAP
3738
from nodescraper.cli.dynamicparserbuilder import DynamicParserBuilder
3839
from nodescraper.cli.helper import (
@@ -45,6 +46,7 @@
4546
log_system_info,
4647
parse_describe,
4748
parse_gen_plugin_config,
49+
process_args,
4850
)
4951
from nodescraper.cli.inputargtypes import ModelArgHandler, json_arg, log_path_arg
5052
from nodescraper.configregistry import ConfigRegistry
@@ -223,6 +225,40 @@ def build_parser(
223225
help="Generate reference config from previous run logfiles. Writes to --output-path/reference_config.json if provided, otherwise ./reference_config.json.",
224226
)
225227

228+
compare_runs_parser = subparsers.add_parser(
229+
"compare-runs",
230+
help="Compare datamodels from two run log directories",
231+
)
232+
compare_runs_parser.add_argument(
233+
"path1",
234+
type=str,
235+
help="Path to first run log directory",
236+
)
237+
compare_runs_parser.add_argument(
238+
"path2",
239+
type=str,
240+
help="Path to second run log directory",
241+
)
242+
compare_runs_parser.add_argument(
243+
"--skip-plugins",
244+
nargs="*",
245+
choices=list(plugin_reg.plugins.keys()),
246+
metavar="PLUGIN",
247+
help="Plugin names to exclude from comparison",
248+
)
249+
compare_runs_parser.add_argument(
250+
"--include-plugins",
251+
nargs="*",
252+
choices=list(plugin_reg.plugins.keys()),
253+
metavar="PLUGIN",
254+
help="If set, only compare data for these plugins (default: compare all found)",
255+
)
256+
compare_runs_parser.add_argument(
257+
"--dont-truncate",
258+
action="store_true",
259+
dest="dont_truncate",
260+
help="Do not truncate the Message column; show full error text and all errors (not just first 3)",
261+
)
226262
config_builder_parser.add_argument(
227263
"--plugins",
228264
nargs="*",
@@ -306,60 +342,6 @@ def setup_logger(log_level: str = "INFO", log_path: Optional[str] = None) -> log
306342
return logger
307343

308344

309-
def process_args(
310-
raw_arg_input: list[str], plugin_names: list[str]
311-
) -> tuple[list[str], dict[str, list[str]]]:
312-
"""separate top level args from plugin args
313-
314-
Args:
315-
raw_arg_input (list[str]): list of all arg input
316-
plugin_names (list[str]): list of plugin names
317-
318-
Returns:
319-
tuple[list[str], dict[str, list[str]]]: tuple of top level args
320-
and dict of plugin name to plugin args
321-
"""
322-
top_level_args = raw_arg_input
323-
324-
try:
325-
plugin_arg_index = raw_arg_input.index("run-plugins")
326-
except ValueError:
327-
plugin_arg_index = -1
328-
329-
plugin_arg_map = {}
330-
invalid_plugins = []
331-
if plugin_arg_index != -1 and plugin_arg_index != len(raw_arg_input) - 1:
332-
top_level_args = raw_arg_input[: plugin_arg_index + 1]
333-
plugin_args = raw_arg_input[plugin_arg_index + 1 :]
334-
335-
# handle help case
336-
if plugin_args == ["-h"]:
337-
top_level_args += plugin_args
338-
else:
339-
cur_plugin = None
340-
for arg in plugin_args:
341-
# Handle comma-separated plugin names (but not arguments)
342-
if not arg.startswith("-") and "," in arg:
343-
# Split comma-separated plugin names
344-
for potential_plugin in arg.split(","):
345-
potential_plugin = potential_plugin.strip()
346-
if potential_plugin in plugin_names:
347-
plugin_arg_map[potential_plugin] = []
348-
cur_plugin = potential_plugin
349-
elif potential_plugin:
350-
# Track invalid plugin names to log event later
351-
invalid_plugins.append(potential_plugin)
352-
elif arg in plugin_names:
353-
plugin_arg_map[arg] = []
354-
cur_plugin = arg
355-
elif cur_plugin:
356-
plugin_arg_map[cur_plugin].append(arg)
357-
elif not arg.startswith("-"):
358-
# Track invalid plugin names to log event later
359-
invalid_plugins.append(arg)
360-
return (top_level_args, plugin_arg_map, invalid_plugins)
361-
362-
363345
def main(arg_input: Optional[list[str]] = None):
364346
"""Main entry point for the CLI
365347
@@ -384,7 +366,11 @@ def main(arg_input: Optional[list[str]] = None):
384366
sname = system_info.name.lower().replace("-", "_").replace(".", "_")
385367
timestamp = datetime.datetime.now().strftime("%Y_%m_%d-%I_%M_%S_%p")
386368

387-
if parsed_args.log_path and parsed_args.subcmd not in ["gen-plugin-config", "describe"]:
369+
if parsed_args.log_path and parsed_args.subcmd not in [
370+
"gen-plugin-config",
371+
"describe",
372+
"compare-runs",
373+
]:
388374
log_path = os.path.join(
389375
parsed_args.log_path,
390376
f"scraper_logs_{sname}_{timestamp}",
@@ -411,6 +397,18 @@ def main(arg_input: Optional[list[str]] = None):
411397
if parsed_args.subcmd == "describe":
412398
parse_describe(parsed_args, plugin_reg, config_reg, logger)
413399

400+
if parsed_args.subcmd == "compare-runs":
401+
run_compare_runs(
402+
parsed_args.path1,
403+
parsed_args.path2,
404+
plugin_reg,
405+
logger,
406+
skip_plugins=getattr(parsed_args, "skip_plugins", None) or [],
407+
include_plugins=getattr(parsed_args, "include_plugins", None),
408+
truncate_message=not getattr(parsed_args, "dont_truncate", False),
409+
)
410+
sys.exit(0)
411+
414412
if parsed_args.subcmd == "gen-plugin-config":
415413

416414
if parsed_args.reference_config_from_logs:

0 commit comments

Comments
 (0)