MetalLB 会在 Kubernetes 内运行,监控服务对象的变化,一旦监测到有新的 LoadBalancer 服务运行,并且没有可申请的负载均衡器之后,就会完成地址分配和外部声明两部分的工作。使用 MetalLB 时,MetalLB 会自己为用户的 LoadBalancer 类型 Service 分配 IP 地址,当然该 IP 地址不是凭空产生的,需要用户在配置中提供一个 IP 地址池,Metallb 将会在其中选取地址分配给服务。
MetalLB 将 IP 分配给某个服务后,它需要对外宣告此 IP 地址,并让外部主机可以路由到此 IP。MetalLB 支持两种声明模式:Layer 2( ARP / NDP )模式或者 BGP 模式。
Layer2 模式
Layer2 模式下,每个 Service 会有集群中的一个 Node 来负责。服务的入口流量全部经由单个节点,然后该节点的 Kube-Proxy 会把流量再转发给服务的 Pods。也就是说,该模式下 MetalLB 并没有真正提供负载均衡器。尽管如此,MetalLB 提供了故障转移功能,如果持有 IP 的节点出现故障,则默认 10 秒后即发生故障转移,IP 会被分配给其它健康的节点。
Layer2 模式的优缺点:
Layer2 模式更为通用,不需要用户有额外的设备;
Layer2 模式下存在单点问题,服务的所有入口流量经由单点,其网络带宽可能成为瓶颈;
由于 Layer 2 模式需要 ARP/NDP 客户端配合,当故障转移发生时,MetalLB 会发送 ARP 包来宣告 MAC 地址和 IP 映射关系的变化,地址分配略为繁琐。
BGP模式
当在第三层工作时,集群中所有机器都和你控制的最接近的路由器建立 BGP 会话,此会话让路由器能学习到如何转发针对 K8S 服务 IP 的数据包。
func (a *Announce) SetBalancer(name string, ip net.IP) { a.Lock() defer a.Unlock()
// Kubernetes may inform us that we should advertise this address multiple // times, so just no-op any subsequent requests. if _, ok := a.ips[name]; ok { return } a.ips[name] = ip
a.ipRefcnt[ip.String()]++ if a.ipRefcnt[ip.String()] > 1 { // Multiple services are using this IP, so there's nothing // else to do right now. return }
for _, client := range a.ndps { if err := client.Watch(ip); err != nil { a.logger.Log("op", "watchMulticastGroup", "error", err, "ip", ip, "msg", "failed to watch NDP multicast group for IP, NDP responder will not respond to requests for this address") } }
go a.spam(name) }
func (a *Announce) spam(name string) { start := time.Now() for time.Since(start) < 5*time.Second { if err := a.gratuitous(name); err != nil { a.logger.Log("op", "gratuitousAnnounce", "error", err, "service", name, "msg", "failed to make gratuitous IP announcement") } time.Sleep(1100 * time.Millisecond) } }
func (a *Announce) gratuitous(name string) error { a.Lock() defer a.Unlock()
ip, ok := a.ips[name] if !ok { // No IP means we've lost control of the IP, someone else is // doing announcements. return nil } if ip.To4() != nil { for _, client := range a.arps { if err := client.Gratuitous(ip); err != nil { return err } } } else { for _, client := range a.ndps { if err := client.Gratuitous(ip); err != nil { return err } } } return nil }
func (a *arpResponder) Gratuitous(ip net.IP) error { for _, op := range []arp.Operation{arp.OperationRequest, arp.OperationReply} { pkt, err := arp.NewPacket(op, a.hardwareAddr, ip, ethernet.Broadcast, ip) if err != nil { return fmt.Errorf("assembling %q gratuitous packet for %q: %s", op, ip, err) } if err = a.conn.WriteTo(pkt, ethernet.Broadcast); err != nil { return fmt.Errorf("writing %q gratuitous packet for %q: %s", op, ip, err) } stats.SentGratuitous(ip.String()) } return nil }
func newARPResponder(logger log.Logger, ifi *net.Interface, ann announceFunc) (*arpResponder, error) { client, err := arp.Dial(ifi) if err != nil { return nil, fmt.Errorf("creating ARP responder for %q: %s", ifi.Name, err) }
ret := &arpResponder{ logger: logger, intf: ifi.Name, hardwareAddr: ifi.HardwareAddr, conn: client, closed: make(chan struct{}), announce: ann, } go ret.run() return ret, nil }
func (a *arpResponder) run() { for a.processRequest() != dropReasonClosed { } }
func (a *arpResponder) processRequest() dropReason { pkt, eth, err := a.conn.Read() if err != nil { // ARP listener doesn't cleanly return EOF when closed, so we // need to hook into the call to arpResponder.Close() // independently. select { case <-a.closed: return dropReasonClosed default: } if err == io.EOF { return dropReasonClosed } return dropReasonError }
// Ignore ARP requests which are not broadcast or bound directly for this machine. if !bytes.Equal(eth.Destination, ethernet.Broadcast) && !bytes.Equal(eth.Destination, a.hardwareAddr) { return dropReasonEthernetDestination }
// Ignore ARP requests that the announcer tells us to ignore. if reason := a.announce(pkt.TargetIP); reason != dropReasonNone { return reason }
stats.GotRequest(pkt.TargetIP.String()) a.logger.Log("interface", a.intf, "ip", pkt.TargetIP, "senderIP", pkt.SenderIP, "senderMAC", pkt.SenderHardwareAddr, "responseMAC", a.hardwareAddr, "msg", "got ARP request for service IP, sending response")