Self-Improving Agents: Continuous Learning Loops (Agent)

Metadata

---
spec: self-improving-agents-learning-loops-v1
date: "2026-03-14"
domain: "agent_self_improvement"
implementations_tested: 4
assessment_type: "qualitative_production_testing"
key_finding: "self_improvement is structured note-taking with promotion, not autonomous learning"
---

Learning Loop Architecture

[learning_loop]
stages: 4
type: "prompt_layer_intervention" # not weight updating
paradigm: "structured_note_taking_with_promotion_pathway"

[[stage.detect]]
purpose: "recognize errors and improvement opportunities"
methods: ["hook_triggers", "manual_evaluation", "user_correction"]
coverage_command_failures: 0.95
coverage_user_corrections: 0.80
coverage_suboptimal_output: 0.35
coverage_confident_hallucinations: 0.05 # hardest unsolved problem
bottleneck: "silent_confident_wrong_outputs"

[[stage.capture]]
purpose: "structured logging of what went wrong and what should happen"
required_fields: ["id", "timestamp", "priority", "status", "area", "summary", "details", "suggested_action"]
id_format: "TYPE-YYYYMMDD-XXX"
types: ["LRN", "ERR", "FEAT"]
priority_levels: ["low", "medium", "high", "critical"]
status_values: ["pending", "in_progress", "resolved", "wont_fix", "promoted"]

[[stage.store]]
purpose: "persist learnings for cross-session retrieval"

[storage_tiers]
[[tier.raw_logs]]
files: [".learnings/LEARNINGS.md", ".learnings/ERRORS.md", ".learnings/FEATURE_REQUESTS.md"]
lifespan: "until_resolved_or_promoted"
durability: 0.4 # session-scoped noise risk
schema: "strict_with_metadata"

[[tier.project_rules]]
files: ["AGENTS.md", "TOOLS.md", "CLAUDE.md", ".cursorrules"]
lifespan: "project_scoped"
durability: 0.8
schema: "curated_concise_rules"

[[tier.behavioral_core]]
files: ["SOUL.md"]
lifespan: "permanent"
durability: 0.95
schema: "principles_and_personality"

[[stage.apply]]
purpose: "load relevant learnings before executing similar tasks"
mechanism: "workspace_file_injection"
context_competition: true # learnings compete with task context
pruning_required: true

Promotion Pathway

[promotion_rules]
trigger_threshold: { recurrence_count: 3, min_tasks: 2, window_days: 30 }
promotion_targets:
  "behavioral_pattern"     → "SOUL.md"
  "workflow_improvement"   → "AGENTS.md"
  "tool_gotcha"            → "TOOLS.md"
  "project_convention"     → "CLAUDE.md"

[promotion_criteria]
when:
  - "learning_applies_across_multiple_files_or_features"
  - "knowledge_any_contributor_should_know"
  - "prevents_recurring_mistakes"
  - "documents_project_specific_conventions"

[promotion_process]
steps:
  1. "distill_learning_into_concise_rule"
  2. "add_to_appropriate_target_file"
  3. "update_original_status_to_promoted"
  4. "add_Promoted_field_with_target_path"

Error Detection Triggers

[detection_triggers]
[[trigger.command_failure]]
pattern: "non_zero_exit_code | timeout | exception"
target: ".learnings/ERRORS.md"
auto_detectable: true
hook_type: "PostToolUse:Bash"

[[trigger.user_correction]]
patterns: ["no that's not right", "actually", "you're wrong", "that's outdated"]
target: ".learnings/LEARNINGS.md"
category: "correction"
auto_detectable: true # via prompt matching

[[trigger.missing_capability]]
patterns: ["can you also", "i wish you could", "why can't you"]
target: ".learnings/FEATURE_REQUESTS.md"
auto_detectable: true

[[trigger.knowledge_gap]]
pattern: "user_provides_unknown_information"
target: ".learnings/LEARNINGS.md"
category: "knowledge_gap"
auto_detectable: false # requires agent awareness

[[trigger.confident_hallucination]]
pattern: "agent_wrong_but_confident"
target: "NONE" # cannot self-detect
auto_detectable: false
severity: "critical_unsolved"

Implementation Comparison

[implementations]

[[openclaw_self_improvement_skill]]
structured_logging: true
schema: "strict_id_timestamp_priority_status_area"
promotion_pathway: true
error_hooks: true # PostToolUse activation
recurrence_tracking: true # count + dates + pattern-keys
cross_session: true
multi_agent_support: true # Claude Code, Codex, Copilot, OpenClaw
skill_extraction: true # learnings → reusable skills
effectiveness: "highest_structured"

[[claude_md_cursor_rules]]
structured_logging: false # freeform text
promotion_pathway: false # manual only
error_hooks: false
recurrence_tracking: false
cross_session: true
human_dependency: "high" # human detects + writes rules
effectiveness: "good_for_conventions_poor_for_errors"

[[session_memory_daily_logs]]
structured_logging: "semi_structured"
promotion_pathway: true # heartbeat distillation to MEMORY.md
error_hooks: false
recurrence_tracking: false
cross_session: true
effectiveness: "depends_on_logging_quality"

Learning Type Effectiveness

[learning_types]

[[type.convention_capture]]
example: "use pnpm not npm, vitest not jest"
effectiveness: 0.90
transfers_well: true
binary: true # right or wrong, no ambiguity
roi: "highest"

[[type.error_reproduction]]
example: "exact command + input + error message logged"
effectiveness: 0.85
transfers_well: true
requires: "specific_reproduction_steps"

[[type.behavioral_correction]]
example: "be concise, ask before external actions"
effectiveness: 0.55
transfers_well: "if_concrete_and_testable"
limitation: "abstract_corrections_produce_no_improvement"

[[type.architectural_learning]]
example: "database schema should have been normalized differently"
effectiveness: 0.20
transfers_well: false
limitation: "requires_full_system_context"

Core Limitations

[limitations]

[[limit.no_weight_update]]
description: "prompt_layer_intervention_not_training_time_improvement"
base_model_unchanged: true
fixable: false # architectural constraint

[[limit.context_competition]]
description: "learnings compete with task context for window space"
mitigation: "aggressive_pruning_and_promotion"
fixable: true # but requires discipline

[[limit.session_amnesia]]
description: "implicit_knowledge_lost_at_session_boundary"
only_survives: "explicitly_written_content"
fixable: false # fundamental to stateless models

[[limit.detection_bottleneck]]
description: "agent_only_learns_from_recognized_errors"
hardest_problem: "confident_hallucination_detection"
fixable: false # currently unsolved

[[limit.arbitrary_thresholds]]
description: "promotion_threshold_3x_in_30d_is_heuristic_not_principled"
some_patterns_should_promote_at: 1
some_patterns_recur_without_generalizing: true
fixable: true # better heuristics possible

Recommendations

[implementation_guidance]

[constraint = structured_learning_needed]
  → "use openclaw self-improvement skill"
  → "setup .learnings/ with LEARNINGS.md, ERRORS.md, FEATURE_REQUESTS.md"
  → "enable PostToolUse hooks for auto error detection"

[constraint = simple_project_conventions]
  → "CLAUDE.md or .cursorrules sufficient"
  → "human-maintained, works well for binary rules"

[constraint = cross_agent_compatibility]
  → "self-improvement skill supports Claude Code, Codex, Copilot, OpenClaw"
  → "use .github/copilot-instructions.md for Copilot-specific"

[constraint = long_running_agent_system]
  → "combine daily logs + heartbeat distillation + promotion pathway"
  → "review .learnings/ before major tasks"
  → "prune stale entries monthly"

[warning]
  "do_not_expect_autonomous_improvement"
  "this_is_structured_note_taking_not_agi"
  "detection_gap_for_confident_errors_is_unsolved"

References

openclaw_self_improvement_skill: "~/.openclaw/workspace/skills/self-improving-agent/SKILL.md"
learnings_schema: "ID: TYPE-YYYYMMDD-XXX, Priority: low|medium|high|critical"
promotion_threshold: "recurrence_count >= 3 across 2+ tasks in 30 days"
workspace_files: ["AGENTS.md", "SOUL.md", "TOOLS.md", "MEMORY.md"]
byterover: "structured_memory_for_cross_session_agent_knowledge"

Other modes

◉ Standard — Full narrative ◆ Experimental — Visual comparison