Compare commits
1 Commits
20875fb03f
...
nrec-forge
| Author | SHA1 | Date | |
|---|---|---|---|
|
6a3e78a479
|
14
.github/workflows/flake-check.yaml
vendored
Normal file
14
.github/workflows/flake-check.yaml
vendored
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
name: Run nix flake check
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
pull_request:
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
flake-check:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
container:
|
||||||
|
image: ghcr.io/catthehacker/ubuntu:runner-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v3
|
||||||
|
- uses: cachix/install-nix-action@v27
|
||||||
|
- run: nix flake check
|
||||||
27
.github/workflows/flake-update.yaml
vendored
Normal file
27
.github/workflows/flake-update.yaml
vendored
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
---
|
||||||
|
name: Periodic flake update
|
||||||
|
on: # yamllint disable-line rule:truthy
|
||||||
|
schedule:
|
||||||
|
- cron: "0 0 * * *"
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: write
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
flake-update:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
container:
|
||||||
|
image: ghcr.io/catthehacker/ubuntu:runner-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v3
|
||||||
|
with:
|
||||||
|
ref: master
|
||||||
|
- uses: cachix/install-nix-action@v27
|
||||||
|
- name: configure git
|
||||||
|
run: |
|
||||||
|
git config --global user.name 'torjus-bot'
|
||||||
|
git config --global user.email 'torjus-bot@git.t-juice.club'
|
||||||
|
- name: flake update
|
||||||
|
run: nix flake update --commit-lock-file
|
||||||
|
- name: push
|
||||||
|
run: git push
|
||||||
@@ -2,21 +2,21 @@
|
|||||||
"mcpServers": {
|
"mcpServers": {
|
||||||
"nixpkgs-options": {
|
"nixpkgs-options": {
|
||||||
"command": "nix",
|
"command": "nix",
|
||||||
"args": ["run", "git+https://code.t-juice.club/torjus/labmcp#nixpkgs-search", "--", "options", "serve"],
|
"args": ["run", "git+https://git.t-juice.club/torjus/labmcp#nixpkgs-search", "--", "options", "serve"],
|
||||||
"env": {
|
"env": {
|
||||||
"NIXPKGS_SEARCH_DATABASE": "sqlite:///run/user/1000/labmcp/nixpkgs-search.db"
|
"NIXPKGS_SEARCH_DATABASE": "sqlite:///run/user/1000/labmcp/nixpkgs-search.db"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nixpkgs-packages": {
|
"nixpkgs-packages": {
|
||||||
"command": "nix",
|
"command": "nix",
|
||||||
"args": ["run", "git+https://code.t-juice.club/torjus/labmcp#nixpkgs-search", "--", "packages", "serve"],
|
"args": ["run", "git+https://git.t-juice.club/torjus/labmcp#nixpkgs-search", "--", "packages", "serve"],
|
||||||
"env": {
|
"env": {
|
||||||
"NIXPKGS_SEARCH_DATABASE": "sqlite:///run/user/1000/labmcp/nixpkgs-search.db"
|
"NIXPKGS_SEARCH_DATABASE": "sqlite:///run/user/1000/labmcp/nixpkgs-search.db"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"lab-monitoring": {
|
"lab-monitoring": {
|
||||||
"command": "nix",
|
"command": "nix",
|
||||||
"args": ["run", "git+https://code.t-juice.club/torjus/labmcp#lab-monitoring", "--", "serve", "--enable-silences"],
|
"args": ["run", "git+https://git.t-juice.club/torjus/labmcp#lab-monitoring", "--", "serve", "--enable-silences"],
|
||||||
"env": {
|
"env": {
|
||||||
"PROMETHEUS_URL": "https://prometheus.home.2rjus.net",
|
"PROMETHEUS_URL": "https://prometheus.home.2rjus.net",
|
||||||
"ALERTMANAGER_URL": "https://alertmanager.home.2rjus.net",
|
"ALERTMANAGER_URL": "https://alertmanager.home.2rjus.net",
|
||||||
@@ -29,7 +29,7 @@
|
|||||||
"command": "nix",
|
"command": "nix",
|
||||||
"args": [
|
"args": [
|
||||||
"run",
|
"run",
|
||||||
"git+https://code.t-juice.club/torjus/homelab-deploy",
|
"git+https://git.t-juice.club/torjus/homelab-deploy",
|
||||||
"--",
|
"--",
|
||||||
"mcp",
|
"mcp",
|
||||||
"--nats-url", "nats://nats1.home.2rjus.net:4222",
|
"--nats-url", "nats://nats1.home.2rjus.net:4222",
|
||||||
@@ -39,7 +39,7 @@
|
|||||||
},
|
},
|
||||||
"git-explorer": {
|
"git-explorer": {
|
||||||
"command": "nix",
|
"command": "nix",
|
||||||
"args": ["run", "git+https://code.t-juice.club/torjus/labmcp#git-explorer", "--", "serve"],
|
"args": ["run", "git+https://git.t-juice.club/torjus/labmcp#git-explorer", "--", "serve"],
|
||||||
"env": {
|
"env": {
|
||||||
"GIT_REPO_PATH": "/home/torjus/git/nixos-servers"
|
"GIT_REPO_PATH": "/home/torjus/git/nixos-servers"
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,155 +0,0 @@
|
|||||||
# Bare Metal Forgejo Actions Runner on nix-cache02
|
|
||||||
|
|
||||||
## Goal
|
|
||||||
|
|
||||||
Add a second Forgejo Actions runner instance on nix-cache02 that executes jobs directly on the host (bare metal). This allows CI builds to populate the nix binary cache automatically, reducing reliance on manually triggered builds before deployments.
|
|
||||||
|
|
||||||
## Motivation
|
|
||||||
|
|
||||||
Currently the workflow for updating a flake input (e.g. nixos-exporter) is:
|
|
||||||
|
|
||||||
1. Update flake lock
|
|
||||||
2. Push to master
|
|
||||||
3. Manually trigger a build on nix-cache02 (or wait for the scheduled builder)
|
|
||||||
4. Deploy to hosts
|
|
||||||
|
|
||||||
With a bare metal runner, repos like nixos-exporter can have CI workflows that run `nix build`, and those derivations automatically end up in the cache (served by harmonia). By the time hosts auto-upgrade, everything is already cached.
|
|
||||||
|
|
||||||
## Design
|
|
||||||
|
|
||||||
### Two Runner Instances
|
|
||||||
|
|
||||||
- **actions1** (existing) — Container-based, available to all Forgejo repos. Unchanged.
|
|
||||||
- **actions2** (new) — Host-based, restricted to trusted repos only via Forgejo runner scoping.
|
|
||||||
|
|
||||||
### Trusted Repos
|
|
||||||
|
|
||||||
Repos that should be allowed to use the bare metal runner:
|
|
||||||
|
|
||||||
- `torjus/nixos-servers`
|
|
||||||
- `torjus/nixos-exporter`
|
|
||||||
- `torjus/nixos` (gunter/magicman configs)
|
|
||||||
- Other repos with nix builds that benefit from cache population (add as needed)
|
|
||||||
|
|
||||||
Restriction is configured in the Forgejo web UI when registering the runner — scope it to specific repos or the org.
|
|
||||||
|
|
||||||
### Label Configuration
|
|
||||||
|
|
||||||
The new instance would use a host label:
|
|
||||||
|
|
||||||
```nix
|
|
||||||
labels = [ "native:host" ];
|
|
||||||
```
|
|
||||||
|
|
||||||
Workflow files in trusted repos would target this with `runs-on: native`.
|
|
||||||
|
|
||||||
### Host Packages
|
|
||||||
|
|
||||||
The runner needs nix and basic tools available:
|
|
||||||
|
|
||||||
```nix
|
|
||||||
hostPackages = with pkgs; [
|
|
||||||
bash
|
|
||||||
coreutils
|
|
||||||
curl
|
|
||||||
gawk
|
|
||||||
gitMinimal
|
|
||||||
gnused
|
|
||||||
nodejs
|
|
||||||
wget
|
|
||||||
nix
|
|
||||||
];
|
|
||||||
```
|
|
||||||
|
|
||||||
## Security Analysis
|
|
||||||
|
|
||||||
### What the runner CAN access
|
|
||||||
|
|
||||||
- **Nix store** — Can read and write derivations. This is the whole point; harmonia serves the store to all hosts.
|
|
||||||
- **Network** — Full network access during job execution.
|
|
||||||
- **World-readable files** — Standard for any process on the system.
|
|
||||||
|
|
||||||
### What the runner CANNOT access
|
|
||||||
|
|
||||||
- **Cache signing key** — `/run/secrets/cache-secret` is mode `0400` root-owned. Harmonia signs derivations on serve, not on store write.
|
|
||||||
- **Vault AppRole credentials** — `/var/lib/vault/approle/` is root-owned.
|
|
||||||
- **Other vault secrets** — All in `/run/secrets/` with restrictive permissions.
|
|
||||||
|
|
||||||
### Mitigations
|
|
||||||
|
|
||||||
- **Trusted repos only** — Forgejo runner scoping restricts which repos can submit jobs. Only repos we control should have access.
|
|
||||||
- **DynamicUser** — The runner uses systemd DynamicUser, so no persistent user account. Each invocation gets an ephemeral UID.
|
|
||||||
- **Separate instance** — Container-based jobs (untrusted repos) remain on actions1 and never get host access.
|
|
||||||
|
|
||||||
### Accepted Risks
|
|
||||||
|
|
||||||
- A compromised trusted repo could inject bad derivations into the nix store/cache. This is an accepted risk since those repos already have deploy access to production hosts.
|
|
||||||
- Jobs can consume host resources (CPU, memory, disk). The `runner.capacity` setting limits concurrent jobs.
|
|
||||||
|
|
||||||
## Implementation
|
|
||||||
|
|
||||||
### 1. NixOS Configuration
|
|
||||||
|
|
||||||
**File:** `hosts/nix-cache02/actions-runner.nix`
|
|
||||||
|
|
||||||
Add a second instance alongside the existing overrides:
|
|
||||||
|
|
||||||
```nix
|
|
||||||
{ pkgs, ... }:
|
|
||||||
{
|
|
||||||
# ... existing actions1 overrides ...
|
|
||||||
|
|
||||||
services.gitea-actions-runner.instances.actions2 = {
|
|
||||||
enable = true;
|
|
||||||
name = "nix-cache02-native";
|
|
||||||
url = "https://code.t-juice.club";
|
|
||||||
tokenFile = "/run/secrets/forgejo-runner-token-native";
|
|
||||||
labels = [ "native:host" ];
|
|
||||||
hostPackages = with pkgs; [
|
|
||||||
bash coreutils curl gawk gitMinimal gnused nodejs wget nix
|
|
||||||
];
|
|
||||||
settings = {
|
|
||||||
runner.capacity = 4;
|
|
||||||
cache = {
|
|
||||||
enabled = true;
|
|
||||||
dir = "/var/lib/gitea-runner/actions2/cache";
|
|
||||||
};
|
|
||||||
};
|
|
||||||
};
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
### 2. Vault Secret
|
|
||||||
|
|
||||||
The native runner needs its own registration token (separate from actions1):
|
|
||||||
|
|
||||||
- Add `hosts/nix-cache02/forgejo-runner-token-native` to `terraform/vault/secrets.tf`
|
|
||||||
- Add `forgejo_runner_token_native` variable to `terraform/vault/variables.tf`
|
|
||||||
- Add vault secret config in `actions-runner.nix` pointing to the new path
|
|
||||||
|
|
||||||
### 3. Forgejo Setup
|
|
||||||
|
|
||||||
1. Generate a new runner token in Forgejo, scoped to trusted repos only
|
|
||||||
2. Store in Vault: `bao kv put secret/hosts/nix-cache02/forgejo-runner-token-native token=<token>`
|
|
||||||
3. Set the tfvar and run `tofu apply` in `terraform/vault/`
|
|
||||||
|
|
||||||
### 4. Example Workflow
|
|
||||||
|
|
||||||
In a trusted repo (e.g. nixos-exporter):
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
name: Build
|
|
||||||
on: [push]
|
|
||||||
jobs:
|
|
||||||
build:
|
|
||||||
runs-on: native
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v4
|
|
||||||
- run: nix build
|
|
||||||
```
|
|
||||||
|
|
||||||
## Open Questions
|
|
||||||
|
|
||||||
- Should `hostPackages` include additional tools (e.g. `cachix`, `nix-prefetch-*`)?
|
|
||||||
- Should we set resource limits on the runner (systemd MemoryMax, CPUQuota)?
|
|
||||||
- Do we want a separate capacity for the native runner vs container runner, or is 4 fine for both?
|
|
||||||
@@ -169,14 +169,6 @@ These appear on both units and can be ignored:
|
|||||||
- Journal corruption confirmed on next boot
|
- Journal corruption confirmed on next boot
|
||||||
- No pstore data captured
|
- No pstore data captured
|
||||||
|
|
||||||
### 2026-03-12: pn02 Memtest86 — 38 Passes, Zero Errors
|
|
||||||
|
|
||||||
- Ran memtest86 for ~109 hours (4.5 days), completing 38 full passes
|
|
||||||
- **Zero errors found** — RAM appears healthy
|
|
||||||
- Makes hardware-induced memory corruption less likely as the sole cause of crashes
|
|
||||||
- Memtest cannot rule out CPU cache errors, PCIe/IOMMU issues, or kernel bugs triggered by platform quirks
|
|
||||||
- **Next step**: Boot back into NixOS with sched_ext disabled to test the kernel scheduler hypothesis
|
|
||||||
|
|
||||||
### 2026-03-07: pn01 Status
|
### 2026-03-07: pn01 Status
|
||||||
|
|
||||||
- pn01 has had **zero crashes** since initial setup on Feb 21
|
- pn01 has had **zero crashes** since initial setup on Feb 21
|
||||||
@@ -201,18 +193,18 @@ These appear on both units and can be ignored:
|
|||||||
|
|
||||||
**pn02 is unreliable.** After exhausting mitigations (fTPM disabled, BIOS updated, WiFi/BT disabled, ErP disabled, amdgpu blacklisted, processor.max_cstate=1, NMI watchdog, rasdaemon), the unit still crashes every few days. 26 reboots in 30 days (7 unclean crashes + daily auto-upgrade reboots).
|
**pn02 is unreliable.** After exhausting mitigations (fTPM disabled, BIOS updated, WiFi/BT disabled, ErP disabled, amdgpu blacklisted, processor.max_cstate=1, NMI watchdog, rasdaemon), the unit still crashes every few days. 26 reboots in 30 days (7 unclean crashes + daily auto-upgrade reboots).
|
||||||
|
|
||||||
The pstore crash dumps from March reveal a new dimension: at least some crashes are **kernel scheduler bugs in sched_ext**, not just silent hardware-level freezes. The `set_next_task_scx` and `pick_next_task_fair` crash sites, combined with the dbus-daemon segfault before one crash, suggest possible memory corruption that manifests in the scheduler. Memtest86 ran 38 passes (109 hours) with zero errors, making option 2 less likely. Remaining possibilities:
|
The pstore crash dumps from March reveal a new dimension: at least some crashes are **kernel scheduler bugs in sched_ext**, not just silent hardware-level freezes. The `set_next_task_scx` and `pick_next_task_fair` crash sites, combined with the dbus-daemon segfault before one crash, suggest possible memory corruption that manifests in the scheduler. It's unclear whether this is:
|
||||||
1. A sched_ext kernel bug exposed by the PN51's hardware quirks (unstable TSC, C-state behavior)
|
1. A sched_ext kernel bug exposed by the PN51's hardware quirks (unstable TSC, C-state behavior)
|
||||||
2. ~~Hardware-induced memory corruption that happens to hit scheduler data structures~~ — unlikely after clean memtest
|
2. Hardware-induced memory corruption that happens to hit scheduler data structures
|
||||||
3. A pure software bug in the 6.12.74 kernel's sched_ext implementation
|
3. A pure software bug in the 6.12.74 kernel's sched_ext implementation
|
||||||
|
|
||||||
**pn01 is stable** — zero crashes in 30 days of continuous operation. Both units have identical kernel and NixOS configuration (minus pn02's diagnostic mitigations), so the difference points toward a hardware defect specific to the pn02 board.
|
**pn01 is stable** — zero crashes in 30 days of continuous operation. Both units have identical kernel and NixOS configuration (minus pn02's diagnostic mitigations), so the difference points toward a hardware defect specific to the pn02 board.
|
||||||
|
|
||||||
## Next Steps
|
## Next Steps
|
||||||
|
|
||||||
- **~~pn02 memtest~~**: ~~Run memtest86 for 24h+~~ — Done (2026-03-12): 38 passes over 109 hours, zero errors. RAM is not the issue.
|
- **pn02 memtest**: Run memtest86 for 24h+ (available in systemd-boot menu). The crash signatures (userspace segfaults before kernel panics, corrupted pointers in scheduler structures) are consistent with intermittent RAM errors that a quick pass wouldn't catch. If memtest finds errors, swap the DIMM.
|
||||||
- **pn02 sched_ext test**: Disable sched_ext (`boot.kernelParams = [ "sched_ext.enabled=0" ]` or equivalent) and run for 1-2 weeks to test whether the crashes stop — would help distinguish kernel bug from hardware defect
|
- **pn02**: Consider scrapping or repurposing for non-critical workloads that tolerate random reboots (auto-recovery via hardware watchdog is now working)
|
||||||
- **pn02**: If sched_ext disable doesn't help, consider scrapping or repurposing for non-critical workloads that tolerate random reboots (auto-recovery via hardware watchdog is working)
|
- **pn02 investigation**: Could try disabling sched_ext (`boot.kernelParams = [ "sched_ext.enabled=0" ]` or equivalent) to test whether the crashes stop — would help distinguish kernel bug from hardware defect
|
||||||
- **pn01**: Continue monitoring. If it remains stable long-term, it is viable for light workloads
|
- **pn01**: Continue monitoring. If it remains stable long-term, it is viable for light workloads
|
||||||
- If pn01 eventually crashes, apply the same mitigations (amdgpu blacklist, max_cstate=1) to see if they help
|
- If pn01 eventually crashes, apply the same mitigations (amdgpu blacklist, max_cstate=1) to see if they help
|
||||||
- For the Incus hypervisor plan: likely need different hardware. Evaluating GMKtec G3 (Intel) as an alternative. Note: mixed Intel/AMD cluster complicates live migration
|
- For the Incus hypervisor plan: likely need different hardware. Evaluating GMKtec G3 (Intel) as an alternative. Note: mixed Intel/AMD cluster complicates live migration
|
||||||
|
|||||||
36
flake.lock
generated
36
flake.lock
generated
@@ -7,18 +7,18 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1773079666,
|
"lastModified": 1739310461,
|
||||||
"narHash": "sha256-midgZRnFEybsH3uJazCJcF9i5Tm5hYVH7+oDLAFpLtU=",
|
"narHash": "sha256-GscftfATX84Aae9FObrQOe+hr5MsEma2Fc5fdzuu3hA=",
|
||||||
"ref": "master",
|
"ref": "master",
|
||||||
"rev": "d8c08778f941a459fccae932e3768f9b9fe1783d",
|
"rev": "53915cec6356be1a2d44ac2cbd0a71b32d679e6f",
|
||||||
"revCount": 11,
|
"revCount": 7,
|
||||||
"type": "git",
|
"type": "git",
|
||||||
"url": "https://code.t-juice.club/torjus/alerttonotify"
|
"url": "https://git.t-juice.club/torjus/alerttonotify"
|
||||||
},
|
},
|
||||||
"original": {
|
"original": {
|
||||||
"ref": "master",
|
"ref": "master",
|
||||||
"type": "git",
|
"type": "git",
|
||||||
"url": "https://code.t-juice.club/torjus/alerttonotify"
|
"url": "https://git.t-juice.club/torjus/alerttonotify"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"homelab-deploy": {
|
"homelab-deploy": {
|
||||||
@@ -28,18 +28,18 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1773081467,
|
"lastModified": 1771488195,
|
||||||
"narHash": "sha256-K22nYBq4FXe/1NJ/wg0uUbFrutgw2j9axbA/1NvvK8E=",
|
"narHash": "sha256-2kMxqdDyPluRQRoES22Y0oSjp7pc5fj2nRterfmSIyc=",
|
||||||
"ref": "master",
|
"ref": "master",
|
||||||
"rev": "713d1e7584c1e076fcf8e6248e2d022027832e86",
|
"rev": "2d26de50559d8acb82ea803764e138325d95572c",
|
||||||
"revCount": 38,
|
"revCount": 37,
|
||||||
"type": "git",
|
"type": "git",
|
||||||
"url": "https://code.t-juice.club/torjus/homelab-deploy"
|
"url": "https://git.t-juice.club/torjus/homelab-deploy"
|
||||||
},
|
},
|
||||||
"original": {
|
"original": {
|
||||||
"ref": "master",
|
"ref": "master",
|
||||||
"type": "git",
|
"type": "git",
|
||||||
"url": "https://code.t-juice.club/torjus/homelab-deploy"
|
"url": "https://git.t-juice.club/torjus/homelab-deploy"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nixos-exporter": {
|
"nixos-exporter": {
|
||||||
@@ -49,17 +49,17 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1773081113,
|
"lastModified": 1770593543,
|
||||||
"narHash": "sha256-99hs9Gvzc+M9hSTY7zSHL7TmhPkOYZ/9li9OhN3kXWc=",
|
"narHash": "sha256-hT8Rj6JAwGDFvcxWEcUzTCrWSiupCfBa57pBDnM2C5g=",
|
||||||
"ref": "refs/heads/master",
|
"ref": "refs/heads/master",
|
||||||
"rev": "79900ae92df5607235f6ddb28eda67270d996819",
|
"rev": "5aa5f7275b7a08015816171ba06d2cbdc2e02d3e",
|
||||||
"revCount": 16,
|
"revCount": 15,
|
||||||
"type": "git",
|
"type": "git",
|
||||||
"url": "https://code.t-juice.club/torjus/nixos-exporter"
|
"url": "https://git.t-juice.club/torjus/nixos-exporter"
|
||||||
},
|
},
|
||||||
"original": {
|
"original": {
|
||||||
"type": "git",
|
"type": "git",
|
||||||
"url": "https://code.t-juice.club/torjus/nixos-exporter"
|
"url": "https://git.t-juice.club/torjus/nixos-exporter"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nixpkgs": {
|
"nixpkgs": {
|
||||||
|
|||||||
15
flake.nix
15
flake.nix
@@ -6,15 +6,15 @@
|
|||||||
nixpkgs-unstable.url = "github:nixos/nixpkgs?ref=nixos-unstable";
|
nixpkgs-unstable.url = "github:nixos/nixpkgs?ref=nixos-unstable";
|
||||||
|
|
||||||
alerttonotify = {
|
alerttonotify = {
|
||||||
url = "git+https://code.t-juice.club/torjus/alerttonotify?ref=master";
|
url = "git+https://git.t-juice.club/torjus/alerttonotify?ref=master";
|
||||||
inputs.nixpkgs.follows = "nixpkgs-unstable";
|
inputs.nixpkgs.follows = "nixpkgs-unstable";
|
||||||
};
|
};
|
||||||
nixos-exporter = {
|
nixos-exporter = {
|
||||||
url = "git+https://code.t-juice.club/torjus/nixos-exporter";
|
url = "git+https://git.t-juice.club/torjus/nixos-exporter";
|
||||||
inputs.nixpkgs.follows = "nixpkgs-unstable";
|
inputs.nixpkgs.follows = "nixpkgs-unstable";
|
||||||
};
|
};
|
||||||
homelab-deploy = {
|
homelab-deploy = {
|
||||||
url = "git+https://code.t-juice.club/torjus/homelab-deploy?ref=master";
|
url = "git+https://git.t-juice.club/torjus/homelab-deploy?ref=master";
|
||||||
inputs.nixpkgs.follows = "nixpkgs-unstable";
|
inputs.nixpkgs.follows = "nixpkgs-unstable";
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
@@ -227,15 +227,6 @@
|
|||||||
./hosts/nrec-nixos01
|
./hosts/nrec-nixos01
|
||||||
];
|
];
|
||||||
};
|
};
|
||||||
nrec-nixos02 = nixpkgs.lib.nixosSystem {
|
|
||||||
inherit system;
|
|
||||||
specialArgs = {
|
|
||||||
inherit inputs self;
|
|
||||||
};
|
|
||||||
modules = commonModules ++ [
|
|
||||||
./hosts/nrec-nixos02
|
|
||||||
];
|
|
||||||
};
|
|
||||||
openstack-template = nixpkgs.lib.nixosSystem {
|
openstack-template = nixpkgs.lib.nixosSystem {
|
||||||
inherit system;
|
inherit system;
|
||||||
specialArgs = {
|
specialArgs = {
|
||||||
|
|||||||
@@ -1,16 +0,0 @@
|
|||||||
{ ... }:
|
|
||||||
{
|
|
||||||
# Fetch runner token from Vault
|
|
||||||
vault.secrets.forgejo-runner-token = {
|
|
||||||
secretPath = "hosts/nix-cache02/forgejo-runner-token";
|
|
||||||
extractKey = "token";
|
|
||||||
mode = "0444";
|
|
||||||
services = [ "gitea-runner-actions1" ];
|
|
||||||
};
|
|
||||||
|
|
||||||
# Override token source and runner capacity
|
|
||||||
services.gitea-actions-runner.instances.actions1 = {
|
|
||||||
tokenFile = "/run/secrets/forgejo-runner-token";
|
|
||||||
settings.runner.capacity = 4;
|
|
||||||
};
|
|
||||||
}
|
|
||||||
@@ -3,8 +3,6 @@
|
|||||||
./configuration.nix
|
./configuration.nix
|
||||||
./builder.nix
|
./builder.nix
|
||||||
./scheduler.nix
|
./scheduler.nix
|
||||||
./actions-runner.nix
|
|
||||||
../../services/nix-cache
|
../../services/nix-cache
|
||||||
../../services/actions-runner
|
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
@@ -34,11 +34,6 @@
|
|||||||
autoResize = true;
|
autoResize = true;
|
||||||
};
|
};
|
||||||
|
|
||||||
fileSystems."/var/lib/forgejo/data/packages" = {
|
|
||||||
device = "/dev/disk/by-uuid/25a84568-b36a-47b3-a6d0-b959209cfdaf";
|
|
||||||
fsType = "ext4";
|
|
||||||
};
|
|
||||||
|
|
||||||
boot.loader.grub.enable = true;
|
boot.loader.grub.enable = true;
|
||||||
boot.loader.grub.device = "/dev/vda";
|
boot.loader.grub.device = "/dev/vda";
|
||||||
networking.hostName = "nrec-nixos01";
|
networking.hostName = "nrec-nixos01";
|
||||||
@@ -70,7 +65,7 @@
|
|||||||
|
|
||||||
services.caddy = {
|
services.caddy = {
|
||||||
enable = true;
|
enable = true;
|
||||||
virtualHosts."code.t-juice.club" = {
|
virtualHosts."nrec-nixos01.t-juice.club" = {
|
||||||
extraConfig = ''
|
extraConfig = ''
|
||||||
reverse_proxy 127.0.0.1:3000
|
reverse_proxy 127.0.0.1:3000
|
||||||
'';
|
'';
|
||||||
|
|||||||
@@ -1,85 +0,0 @@
|
|||||||
{ lib, pkgs, ... }:
|
|
||||||
|
|
||||||
{
|
|
||||||
services.openssh = {
|
|
||||||
enable = true;
|
|
||||||
settings = {
|
|
||||||
PermitRootLogin = lib.mkForce "no";
|
|
||||||
PasswordAuthentication = false;
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
users.users.nixos = {
|
|
||||||
isNormalUser = true;
|
|
||||||
extraGroups = [ "wheel" ];
|
|
||||||
shell = pkgs.zsh;
|
|
||||||
openssh.authorizedKeys.keys = [
|
|
||||||
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIAwfb2jpKrBnCw28aevnH8HbE5YbcMXpdaVv2KmueDu6 torjus@gunter"
|
|
||||||
];
|
|
||||||
};
|
|
||||||
security.sudo.wheelNeedsPassword = false;
|
|
||||||
programs.zsh.enable = true;
|
|
||||||
|
|
||||||
homelab.dns.enable = false;
|
|
||||||
homelab.monitoring.enable = false;
|
|
||||||
homelab.host.labels.ansible = "false";
|
|
||||||
|
|
||||||
fileSystems."/" = {
|
|
||||||
device = "/dev/disk/by-label/nixos";
|
|
||||||
fsType = "ext4";
|
|
||||||
autoResize = true;
|
|
||||||
};
|
|
||||||
|
|
||||||
boot.loader.grub.enable = true;
|
|
||||||
boot.loader.grub.device = "/dev/vda";
|
|
||||||
networking.hostName = "nrec-nixos02";
|
|
||||||
networking.useNetworkd = true;
|
|
||||||
networking.useDHCP = false;
|
|
||||||
services.resolved.enable = true;
|
|
||||||
|
|
||||||
systemd.network.enable = true;
|
|
||||||
systemd.network.networks."ens3" = {
|
|
||||||
matchConfig.Name = "ens3";
|
|
||||||
networkConfig.DHCP = "ipv4";
|
|
||||||
linkConfig.RequiredForOnline = "routable";
|
|
||||||
};
|
|
||||||
time.timeZone = "Europe/Oslo";
|
|
||||||
|
|
||||||
networking.firewall.enable = true;
|
|
||||||
networking.firewall.allowedTCPPorts = [
|
|
||||||
22
|
|
||||||
80
|
|
||||||
443
|
|
||||||
];
|
|
||||||
|
|
||||||
nix.settings.substituters = [
|
|
||||||
"https://cache.nixos.org"
|
|
||||||
];
|
|
||||||
nix.settings.trusted-public-keys = [
|
|
||||||
"cache.nixos.org-1:6NCHdD59X431o0gWypbMrAURkbJ16ZPMQFGspcDShjY="
|
|
||||||
];
|
|
||||||
|
|
||||||
services.pocket-id = {
|
|
||||||
enable = true;
|
|
||||||
settings = {
|
|
||||||
APP_URL = "https://oidc.t-juice.club";
|
|
||||||
TRUST_PROXY = true;
|
|
||||||
ANALYTICS_DISABLED = true;
|
|
||||||
VERSION_CHECK_DISABLED = true;
|
|
||||||
HOST = "127.0.0.1";
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
services.caddy = {
|
|
||||||
enable = true;
|
|
||||||
virtualHosts."oidc.t-juice.club" = {
|
|
||||||
extraConfig = ''
|
|
||||||
reverse_proxy 127.0.0.1:1411
|
|
||||||
'';
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
zramSwap.enable = true;
|
|
||||||
|
|
||||||
system.stateVersion = "25.11";
|
|
||||||
}
|
|
||||||
@@ -1,9 +0,0 @@
|
|||||||
{ modulesPath, ... }:
|
|
||||||
{
|
|
||||||
imports = [
|
|
||||||
./configuration.nix
|
|
||||||
../../system/packages.nix
|
|
||||||
../../services/actions-runner
|
|
||||||
(modulesPath + "/profiles/qemu-guest.nix")
|
|
||||||
];
|
|
||||||
}
|
|
||||||
@@ -15,7 +15,7 @@
|
|||||||
boot.loader.systemd-boot.memtest86.enable = true;
|
boot.loader.systemd-boot.memtest86.enable = true;
|
||||||
boot.loader.efi.canTouchEfiVariables = true;
|
boot.loader.efi.canTouchEfiVariables = true;
|
||||||
boot.blacklistedKernelModules = [ "amdgpu" ];
|
boot.blacklistedKernelModules = [ "amdgpu" ];
|
||||||
boot.kernelParams = [ "panic=10" "nmi_watchdog=1" "processor.max_cstate=1" "sched_ext.enabled=0" ];
|
boot.kernelParams = [ "panic=10" "nmi_watchdog=1" "processor.max_cstate=1" ];
|
||||||
boot.kernel.sysctl."kernel.softlockup_panic" = 1;
|
boot.kernel.sysctl."kernel.softlockup_panic" = 1;
|
||||||
boot.kernel.sysctl."kernel.hardlockup_panic" = 1;
|
boot.kernel.sysctl."kernel.hardlockup_panic" = 1;
|
||||||
|
|
||||||
|
|||||||
@@ -1,37 +0,0 @@
|
|||||||
{ config, lib, pkgs, ... }:
|
|
||||||
{
|
|
||||||
# Trust podman interfaces so containers can reach the runner's cache service.
|
|
||||||
# "podman+" is a wildcard matching any interface starting with "podman".
|
|
||||||
networking.firewall.trustedInterfaces = [ "podman+" ];
|
|
||||||
|
|
||||||
virtualisation.podman = {
|
|
||||||
enable = true;
|
|
||||||
dockerCompat = true;
|
|
||||||
dockerSocket.enable = true;
|
|
||||||
};
|
|
||||||
|
|
||||||
services.gitea-actions-runner = {
|
|
||||||
package = pkgs.forgejo-runner;
|
|
||||||
|
|
||||||
instances.actions1 = {
|
|
||||||
enable = true;
|
|
||||||
name = config.networking.hostName;
|
|
||||||
url = "https://code.t-juice.club";
|
|
||||||
tokenFile = lib.mkDefault "/var/lib/forgejo-runner/token";
|
|
||||||
labels = [
|
|
||||||
"nix:docker://code.t-juice.club/torjus/runner-images/nix:latest"
|
|
||||||
"node-bookworm:docker://node:lts-bookworm-slim"
|
|
||||||
"alpine:docker://alpine:latest"
|
|
||||||
"golang:docker://code.t-juice.club/torjus/runner-images/golang:latest"
|
|
||||||
];
|
|
||||||
settings = {
|
|
||||||
runner.capacity = lib.mkDefault 2;
|
|
||||||
cache = {
|
|
||||||
enabled = true;
|
|
||||||
dir = "/var/lib/gitea-runner/actions1/cache";
|
|
||||||
};
|
|
||||||
container.privileged = false;
|
|
||||||
};
|
|
||||||
};
|
|
||||||
};
|
|
||||||
}
|
|
||||||
@@ -1,17 +1,16 @@
|
|||||||
{ pkgs, ... }:
|
{ ... }:
|
||||||
{
|
{
|
||||||
services.forgejo = {
|
services.forgejo = {
|
||||||
package = pkgs.forgejo;
|
|
||||||
enable = true;
|
enable = true;
|
||||||
database.type = "sqlite3";
|
database.type = "sqlite3";
|
||||||
lfs.enable = true;
|
|
||||||
settings = {
|
settings = {
|
||||||
server = {
|
server = {
|
||||||
DOMAIN = "code.t-juice.club";
|
DOMAIN = "nrec-nixos01.t-juice.club";
|
||||||
ROOT_URL = "https://code.t-juice.club/";
|
ROOT_URL = "https://nrec-nixos01.t-juice.club/";
|
||||||
HTTP_ADDR = "127.0.0.1";
|
HTTP_ADDR = "127.0.0.1";
|
||||||
HTTP_PORT = 3000;
|
HTTP_PORT = 3000;
|
||||||
};
|
};
|
||||||
|
server.LFS_START_SERVER = true;
|
||||||
service.DISABLE_REGISTRATION = true;
|
service.DISABLE_REGISTRATION = true;
|
||||||
"service.explore".REQUIRE_SIGNIN_VIEW = true;
|
"service.explore".REQUIRE_SIGNIN_VIEW = true;
|
||||||
session.COOKIE_SECURE = true;
|
session.COOKIE_SECURE = true;
|
||||||
|
|||||||
@@ -38,12 +38,6 @@
|
|||||||
do-udp = "yes";
|
do-udp = "yes";
|
||||||
do-tcp = "yes";
|
do-tcp = "yes";
|
||||||
extended-statistics = true;
|
extended-statistics = true;
|
||||||
|
|
||||||
# Recover faster from upstream failures (e.g. ISP outage)
|
|
||||||
# Default 900s is too long - keeps marking servers as bad
|
|
||||||
infra-host-ttl = 120;
|
|
||||||
# Clean up stale TLS connections faster (default 60s)
|
|
||||||
tcp-reuse-timeout = 15;
|
|
||||||
};
|
};
|
||||||
remote-control = {
|
remote-control = {
|
||||||
control-enable = true;
|
control-enable = true;
|
||||||
|
|||||||
@@ -145,12 +145,6 @@ locals {
|
|||||||
password_length = 64
|
password_length = 64
|
||||||
}
|
}
|
||||||
|
|
||||||
# Forgejo runner token for nix-cache02
|
|
||||||
"hosts/nix-cache02/forgejo-runner-token" = {
|
|
||||||
auto_generate = false
|
|
||||||
data = { token = var.forgejo_runner_token }
|
|
||||||
}
|
|
||||||
|
|
||||||
# Loki push authentication (used by Promtail on all hosts)
|
# Loki push authentication (used by Promtail on all hosts)
|
||||||
"shared/loki/push-auth" = {
|
"shared/loki/push-auth" = {
|
||||||
auto_generate = true
|
auto_generate = true
|
||||||
|
|||||||
@@ -109,10 +109,3 @@ variable "sonarr_api_key" {
|
|||||||
sensitive = true
|
sensitive = true
|
||||||
}
|
}
|
||||||
|
|
||||||
variable "forgejo_runner_token" {
|
|
||||||
description = "Forgejo Actions runner token for nix-cache02"
|
|
||||||
type = string
|
|
||||||
default = "PLACEHOLDER"
|
|
||||||
sensitive = true
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user