Estructuras de Datos Fundamentales
El subsistema PCI en el kernel de Linux se basa en esrtucturas de datos complejas para representar la topología del bus, los dispositivos conectados y los recursos de hardware asignados.
Nodo de Bus PCI
struct pci_bus_node {
struct list_head sibling_link; // Enlace a otros buses en el mismo nivel
struct pci_bus_node *parent_bus; // Bus padre (puente)
struct list_head child_buses; // Lista de buses hijos
struct list_head attached_devices; // Dispositivos conectados a este bus
struct pci_device_node *bridge_dev; // El dispositivo puente visto desde el padre
struct list_head physical_slots; // Ranuras físicas
struct hw_resource *address_space[PCI_BUS_NUM_RESOURCES]; // Espacio de direcciones
struct pci_access_ops *access_methods; // Funciones de acceso a configuración
void *platform_data; // Datos específicos de la plataforma
unsigned char bus_id; // Número de bus
unsigned char primary_bus;
unsigned char secondary_bus;
unsigned char max_subordinate;
// ...
};
Nodo de Dispositivo PCI
struct pci_device_node {
struct list_head bus_link; // Nodo en la lista del bus
struct pci_bus_node *host_bus; // Bus donde reside
struct pci_bus_node *sub_bus; // Bus puenteado por este dispositivo
void *platform_data;
struct pci_physical_slot *slot_info;
unsigned int dev_func; // Índice combinado de dispositivo y función
unsigned short vendor_id; // ID del fabricante
unsigned short device_id; // ID del dispositivo
unsigned short sub_vendor_id;
unsigned short sub_device_id;
struct hw_resource mem_regions[DEVICE_COUNT_RESOURCE]; // Regiones BAR y ROM
// ...
};
Recurso de Hardware
struct hw_resource {
resource_size_t base_addr;
resource_size_t limit_addr;
const char *owner_name;
unsigned long attributes;
struct hw_resource *parent_node, *next_sibling, *first_child;
};
Funciones Clave del Proceso de Inicialización
Sondeo de Acceso Directo PCI
Solicita espacios de E/S para los mecanismos de configuración Tipo 1 y Tipo 2, determinando el método de acceso soportado por el hardware.
int __init probe_direct_pci_access(void)
{
struct hw_resource *io_region_cfg1, *io_region_cfg2;
if (!(pci_probe_flags & PCI_PROBE_CONF1))
goto check_type2;
io_region_cfg1 = request_io_region(0xCF8, 8, "PCI Config Type 1");
if (!io_region_cfg1)
goto check_type2;
if (verify_pci_type1()) {
raw_pci_access_ops = &pci_direct_type1_ops;
is_cf9_port_safe = true;
return 1;
}
release_hw_resource(io_region_cfg1);
check_type2:
if (!(pci_probe_flags & PCI_PROBE_CONF2))
return 0;
io_region_cfg2 = request_io_region(0xCF8, 4, "PCI Config Type 2");
if (!io_region_cfg2)
return 0;
struct hw_resource *io_region_c000 = request_io_region(0xC000, 0x1000, "PCI Config Type 2 Ext");
if (!io_region_c000) {
release_hw_resource(io_region_cfg2);
return 0;
}
if (verify_pci_type2()) {
raw_pci_access_ops = &pci_direct_type2_ops;
is_cf9_port_safe = true;
return 2;
}
release_hw_resource(io_region_c000);
release_hw_resource(io_region_cfg2);
return 0;
}
Validación del Puente Host
Verifica la presencia de un puente Host-PCI válido en el bus raíz para asegurar la integridad del sistema.
static int __init validate_host_bridge(struct pci_raw_access_ops *ops)
{
u32 reg_val = 0;
int bios_year, dev_func;
if (pci_probe_flags & PCI_NO_CHECKS)
return 1;
dmi_get_date(DMI_BIOS_DATE, &bios_year, NULL, NULL);
if (bios_year >= 2001)
return 1;
for (dev_func = 0; dev_func < 0x100; dev_func++) {
if (ops->read_reg(0, 0, dev_func, PCI_CLASS_DEVICE, 2, ®_val))
continue;
if (reg_val == PCI_CLASS_BRIDGE_HOST || reg_val == PCI_CLASS_DISPLAY_VGA)
return 1;
if (ops->read_reg(0, 0, dev_func, PCI_VENDOR_ID, 2, ®_val))
continue;
if (reg_val == PCI_VENDOR_ID_INTEL || reg_val == PCI_VENDOR_ID_COMPAQ)
return 1;
}
pr_warn("PCI: Host bridge validation failed\n");
return 0;
}
Inicialización de MMCONFIG
Configura el acceso a la memoria mapeada para el espacio de configuración PCI extendido (MMCONFIG).
void __init early_mmcfg_setup(void)
{
if (pci_probe_flags & PCI_PROBE_MMCONF) {
if (check_mmcfg_hostbridge())
known_bridge = 1;
else
acpi_parse_table(ACPI_SIG_MCFG, parse_mcfg_table);
initialize_mmcfg(1);
setup_apei_filter();
}
}
int __init arch_mmcfg_setup(void)
{
struct pci_mmcfg_region *region_node;
list_for_each_entry(region_node, &pci_mmcfg_list, list_node) {
if (map_mmcfg_region(region_node)) {
cleanup_mmcfg_mappings();
return 0;
}
}
pci_extended_ops = &mmcfg_access_ops;
return 1;
}
int map_mmcfg_region(struct pci_mmcfg_region *region_node)
{
region_node->virtual_addr = ioremap_mmcfg(region_node);
if (!region_node->virtual_addr) {
pr_err("Failed to map MMCONFIG region at %pR\n", ®ion_node->res);
return -ENOMEM;
}
return 0;
}
static void __iomem *ioremap_mmcfg(struct pci_mmcfg_region *region_node)
{
void __iomem *mapped_addr;
u64 phys_start, map_size;
int bus_count;
phys_start = region_node->base_address + MMCFG_BUS_OFFSET(region_node->start_bus);
bus_count = region_node->end_bus - region_node->start_bus + 1;
map_size = MMCFG_BUS_OFFSET(bus_count);
mapped_addr = ioremap_nocache(phys_start, map_size);
if (mapped_addr)
mapped_addr -= MMCFG_BUS_OFFSET(region_node->start_bus);
return mapped_addr;
}
Análisis de la Tabla ACPI MCFG
Extrae y procesa las asignaciones de memoria de la tabla MCFG proporcionada por el firmware ACPI.
struct acpi_mcfg_table {
struct acpi_table_header header;
u8 reserved_bytes[8];
};
struct acpi_mcfg_allocation {
u64 base_address;
u16 segment_group;
u8 start_bus;
u8 end_bus;
u32 reserved;
};
static int __init parse_mcfg_table(struct acpi_table_header *header)
{
struct acpi_mcfg_table *mcfg_header;
struct acpi_mcfg_allocation *alloc_table, *current_alloc;
unsigned long remaining_bytes;
int entry_count;
if (!header)
return -EINVAL;
mcfg_header = (struct acpi_mcfg_table *)header;
clear_all_mmcfg_entries();
entry_count = 0;
remaining_bytes = header->table_length - sizeof(struct acpi_mcfg_table);
while (remaining_bytes >= sizeof(struct acpi_mcfg_allocation)) {
entry_count++;
remaining_bytes -= sizeof(struct acpi_mcfg_allocation);
}
if (entry_count == 0) {
pr_err("MMCONFIG table is empty\n");
return -ENODEV;
}
alloc_table = (struct acpi_mcfg_allocation *)&mcfg_header[1];
for (int i = 0; i < entry_count; i++) {
current_alloc = &alloc_table[i];
if (validate_mcfg_entry(mcfg_header, current_alloc)) {
clear_all_mmcfg_entries();
return -ENODEV;
}
if (!add_mmcfg_region(current_alloc->segment_group, current_alloc->start_bus,
current_alloc->end_bus, current_alloc->base_address)) {
pr_warn("Insufficient memory for MCFG entries\n");
clear_all_mmcfg_entries();
return -ENOMEM;
}
}
return 0;
}
Escaneo de Buses Subordinados
Implementa un algoritmo de búsqueda en profundidad para descubrir todos los dispositivos y puentes en el árbol PCI.
unsigned int __devinit scan_subordinate_buses(struct pci_bus_node *bus)
{
unsigned int dev_func, scan_pass, max_bus = bus->secondary_bus;
struct pci_device_node *device;
for (dev_func = 0; dev_func < 0x100; dev_func += 8)
scan_physical_slot(bus, dev_func);
max_bus += calculate_iov_bus_range(bus);
if (!bus->is_added) {
apply_arch_bus_fixups(bus);
if (is_root_bus(bus))
bus->is_added = 1;
}
for (scan_pass = 0; scan_pass < 2; scan_pass++) {
list_for_each_entry(device, &bus->attached_devices, bus_link) {
if (device->header_type == PCI_HEADER_TYPE_BRIDGE ||
device->header_type == PCI_HEADER_TYPE_CARDBUS) {
max_bus = scan_pci_bridge(bus, device, max_bus, scan_pass);
}
}
}
return max_bus;
}
Lectura de Registros BAR
Determina el tamaño y la dirección base de los registros de dirección base (BAR) de los dispositivos PCI.
int read_pci_base_address(struct pci_device_node *dev, enum pci_bar_type bar_type,
struct hw_resource *res, unsigned int offset)
{
u32 original_val, size_val, mask;
mask = bar_type ? ~PCI_ROM_ADDRESS_ENABLE : ~0;
res->owner_name = get_pci_device_name(dev);
pci_read_dword(dev, offset, &original_val);
pci_write_dword(dev, offset, mask);
pci_read_dword(dev, offset, &size_val);
pci_write_dword(dev, offset, original_val);
if (!size_val || size_val == 0xffffffff)
goto error_exit;
if (original_val == 0xffffffff)
original_val = 0;
if (bar_type == pci_bar_unknown) {
bar_type = decode_bar_type(res, original_val);
res->attributes |= calculate_resource_flags(original_val) | IORESOURCE_SIZEALIGN;
if (bar_type == pci_bar_io) {
original_val &= PCI_BASE_ADDRESS_IO_MASK;
mask = PCI_BASE_ADDRESS_IO_MASK & IO_SPACE_LIMIT;
} else {
original_val &= PCI_BASE_ADDRESS_MEM_MASK;
mask = (u32)PCI_BASE_ADDRESS_MEM_MASK;
}
} else {
res->attributes |= (original_val & IORESOURCE_ROM_ENABLE);
original_val &= PCI_ROM_ADDRESS_MASK;
mask = (u32)PCI_ROM_ADDRESS_MASK;
}
if (bar_type == pci_bar_mem64) {
u64 val_64 = original_val;
u64 size_64 = size_val;
u64 mask_64 = mask | (u64)~0 << 32;
pci_read_dword(dev, offset + 4, &original_val);
pci_write_dword(dev, offset + 4, ~0);
pci_read_dword(dev, offset + 4, &size_val);
pci_write_dword(dev, offset + 4, original_val);
val_64 |= ((u64)original_val << 32);
size_64 |= ((u64)size_val << 32);
size_64 = calculate_pci_size(val_64, size_64, mask_64);
if (!size_64)
goto error_exit;
if ((sizeof(resource_size_t) < 8) && (size_64 > 0x100000000ULL)) {
dev_err(&dev->dev, "64-bit BAR not supported\n");
goto error_exit;
} else if ((sizeof(resource_size_t) < 8) && val_64) {
pci_write_dword(dev, offset, 0);
pci_write_dword(dev, offset + 4, 0);
res->base_addr = 0;
res->limit_addr = size_64;
} else {
res->base_addr = val_64;
res->limit_addr = val_64 + size_64;
}
res->attributes |= IORESOURCE_MEM_64;
} else {
size_val = calculate_pci_size(original_val, size_val, mask);
if (!size_val)
goto error_exit;
res->base_addr = original_val;
res->limit_addr = original_val + size_val;
}
return (bar_type == pci_bar_mem64) ? 1 : 0;
error_exit:
res->attributes = 0;
return 0;
}
static u64 calculate_pci_size(u64 base, u64 maxbase, u64 mask)
{
u64 size = mask & maxbase;
if (!size)
return 0;
size = (size & ~(size-1)) - 1;
if (base == maxbase && ((base | size) & mask) != mask)
return 0;
return size;
}
Ventanas de Filtrado de Puentes
Lee las ventanas de E/S y memoria de los puentes PCI para enrutar correctamente las transacciones hacia los buses secundarios.
void __devinit read_bridge_windows(struct pci_bus_node *child_bus)
{
struct pci_device_node *bridge_dev = child_bus->bridge_dev;
u8 io_base_low, io_limit_low;
u16 mem_base_low, mem_limit_low;
unsigned long base_addr, limit_addr;
struct hw_resource *res;
if (is_root_bus(child_bus))
return;
// Ventana de E/S
res = child_bus->address_space[0];
pci_read_byte(bridge_dev, PCI_IO_BASE, &io_base_low);
pci_read_byte(bridge_dev, PCI_IO_LIMIT, &io_limit_low);
base_addr = (io_base_low & PCI_IO_RANGE_MASK) << 8;
limit_addr = (io_limit_low & PCI_IO_RANGE_MASK) << 8;
if ((io_base_low & PCI_IO_RANGE_TYPE_MASK) == PCI_IO_RANGE_TYPE_32) {
u16 io_base_high, io_limit_high;
pci_read_word(bridge_dev, PCI_IO_BASE_UPPER16, &io_base_high);
pci_read_word(bridge_dev, PCI_IO_LIMIT_UPPER16, &io_limit_high);
base_addr |= (io_base_high << 16);
limit_addr |= (io_limit_high << 16);
}
if (base_addr <= limit_addr) {
res->attributes = (io_base_low & PCI_IO_RANGE_TYPE_MASK) | IORESOURCE_IO;
if (!res->base_addr) res->base_addr = base_addr;
if (!res->limit_addr) res->limit_addr = limit_addr + 0xfff;
}
// Ventana de Memoria
res = child_bus->address_space[1];
pci_read_word(bridge_dev, PCI_MEMORY_BASE, &mem_base_low);
pci_read_word(bridge_dev, PCI_MEMORY_LIMIT, &mem_limit_low);
base_addr = (mem_base_low & PCI_MEMORY_RANGE_MASK) << 16;
limit_addr = (mem_limit_low & PCI_MEMORY_RANGE_MASK) << 16;
if (base_addr <= limit_addr) {
res->attributes = (mem_base_low & PCI_MEMORY_RANGE_TYPE_MASK) | IORESOURCE_MEM;
res->base_addr = base_addr;
res->limit_addr = limit_addr + 0xfffff;
}
// Ventana de Memoria Prefetchable
res = child_bus->address_space[2];
pci_read_word(bridge_dev, PCI_PREF_MEMORY_BASE, &mem_base_low);
pci_read_word(bridge_dev, PCI_PREF_MEMORY_LIMIT, &mem_limit_low);
base_addr = (mem_base_low & PCI_PREF_RANGE_MASK) << 16;
limit_addr = (mem_limit_low & PCI_PREF_RANGE_MASK) << 16;
if ((mem_base_low & PCI_PREF_RANGE_TYPE_MASK) == PCI_PREF_RANGE_TYPE_64) {
u32 mem_base_high, mem_limit_high;
pci_read_dword(bridge_dev, PCI_PREF_BASE_UPPER32, &mem_base_high);
pci_read_dword(bridge_dev, PCI_PREF_LIMIT_UPPER32, &mem_limit_high);
if (mem_base_high <= mem_limit_high) {
base_addr |= ((unsigned long)mem_base_high) << 32;
limit_addr |= ((unsigned long)mem_limit_high) << 32;
}
}
if (base_addr <= limit_addr) {
res->attributes = (mem_base_low & PCI_PREF_RANGE_TYPE_MASK) | IORESOURCE_MEM | IORESOURCE_PREFETCH;
if (res->attributes & PCI_PREF_RANGE_TYPE_64)
res->attributes |= IORESOURCE_MEM_64;
res->base_addr = base_addr;
res->limit_addr = limit_addr + 0xfffff;
}
}
Asignación y Actualización de Recursos
Busca espacios libres en el árbol de recursos y actualiza los registros BAR con las direcciones asignadsa.
static inline void __devinit allocate_device_resource(struct pci_device_node *dev, int index)
{
struct hw_resource *parent_res, *current_res = &dev->mem_regions[index];
parent_res = find_parent_resource(dev, current_res);
if (!parent_res || (parent_res->attributes & IORESOURCE_UNSET) ||
request_hw_resource(parent_res, current_res) < 0) {
current_res->attributes |= IORESOURCE_UNSET;
current_res->limit_addr -= current_res->base_addr;
current_res->base_addr = 0;
}
}
static int locate_free_resource(struct hw_resource *root, struct hw_resource *new_res,
resource_size_t size, resource_size_t min,
resource_size_t max, resource_size_t align,
void (*align_func)(void *, struct hw_resource *, resource_size_t, resource_size_t),
void *func_data)
{
struct hw_resource *current_node = root->first_child;
new_res->base_addr = root->base_addr;
if (current_node && current_node->base_addr == 0) {
new_res->base_addr = current_node->limit_addr + 1;
current_node = current_node->next_sibling;
}
while (1) {
if (current_node)
new_res->limit_addr = current_node->base_addr - 1;
else
new_res->limit_addr = root->limit_addr;
if (new_res->base_addr < min) new_res->base_addr = min;
if (new_res->limit_addr > max) new_res->limit_addr = max;
new_res->base_addr = ALIGN(new_res->base_addr, align);
if (align_func)
align_func(func_data, new_res, size, align);
if (new_res->base_addr < new_res->limit_addr &&
new_res->limit_addr - new_res->base_addr >= size - 1) {
new_res->limit_addr = new_res->base_addr + size - 1;
return 0;
}
if (!current_node) break;
new_res->base_addr = current_node->limit_addr + 1;
current_node = current_node->next_sibling;
}
return -EBUSY;
}
void update_pci_resource(struct pci_device_node *dev, int res_index)
{
struct pci_bus_region region;
u32 new_val, check_val, mask;
int reg_offset;
enum pci_bar_type bar_type;
struct hw_resource *res = dev->mem_regions + res_index;
if (!res->attributes || (res->attributes & IORESOURCE_PCI_FIXED))
return;
convert_resource_to_bus(dev, ®ion, res);
new_val = region.start | (res->attributes & PCI_REGION_FLAG_MASK);
mask = (res->attributes & IORESOURCE_IO) ?
(u32)PCI_BASE_ADDRESS_IO_MASK : (u32)PCI_BASE_ADDRESS_MEM_MASK;
reg_offset = get_resource_bar_offset(dev, res_index, &bar_type);
if (!reg_offset) return;
if (bar_type != pci_bar_unknown && !(res->attributes & IORESOURCE_ROM_ENABLE))
return;
if (bar_type != pci_bar_unknown)
new_val |= PCI_ROM_ADDRESS_ENABLE;
pci_write_dword(dev, reg_offset, new_val);
pci_read_dword(dev, reg_offset, &check_val);
if ((new_val ^ check_val) & mask)
dev_err(&dev->dev, "BAR %d update mismatch\n", res_index);
if ((new_val & (PCI_BASE_ADDRESS_SPACE|PCI_BASE_ADDRESS_MEM_TYPE_MASK)) ==
(PCI_BASE_ADDRESS_SPACE_MEMORY|PCI_BASE_ADDRESS_MEM_TYPE_64)) {
new_val = region.start >> 32;
pci_write_dword(dev, reg_offset + 4, new_val);
}
res->attributes &= ~IORESOURCE_UNSET;
}
Operaciones de Acceso al Espacio de Configuración
Macros y funciones de bajo nivel para leer y escribir en el espacio de configuración PCI, abstrayendo las diferencias entre el acceso por puertos de E/S y el acceso mapeado en memoria.
Acceso Tipo 1 (Puertos de E/S)
static int pci_type1_read(unsigned int seg, unsigned int bus,
unsigned int dev_func, int reg, int len, u32 *value)
{
unsigned long irq_flags;
if (seg || (bus > 255) || (dev_func > 255) || (reg > 4095)) {
*value = -1;
return -EINVAL;
}
raw_spin_lock_irqsave(&pci_config_lock, irq_flags);
outl(PCI_CONF1_ADDRESS(bus, dev_func, reg), 0xCF8);
switch (len) {
case 1: *value = inb(0xCFC + (reg & 3)); break;
case 2: *value = inw(0xCFC + (reg & 2)); break;
case 4: *value = inl(0xCFC); break;
}
raw_spin_unlock_irqrestore(&pci_config_lock, irq_flags);
return 0;
}
Acceso MMCONFIG (Memoria Mapeada)
static int mmcfg_read(unsigned int seg, unsigned int bus,
unsigned int dev_func, int reg, int len, u32 *value)
{
char __iomem *mapped_addr;
if (unlikely((bus > 255) || (dev_func > 255) || (reg > 4095))) {
*value = -1;
return -EINVAL;
}
rcu_read_lock();
mapped_addr = get_mmcfg_device_base(seg, bus, dev_func);
if (!mapped_addr) {
rcu_read_unlock();
*value = -1;
return -EINVAL;
}
switch (len) {
case 1: *value = mmio_readb(mapped_addr + reg); break;
case 2: *value = mmio_readw(mapped_addr + reg); break;
case 4: *value = mmio_readl(mapped_addr + reg); break;
}
rcu_read_unlock();
return 0;
}
static char __iomem *get_mmcfg_device_base(unsigned int seg, unsigned int bus, unsigned int dev_func)
{
struct pci_mmcfg_region *cfg = lookup_mmcfg_region(seg, bus);
if (cfg && cfg->virtual_addr)
return cfg->virtual_addr + (MMCFG_BUS_OFFSET(bus) | (dev_func << 12));
return NULL;
}
Gestión del Árbol de Recursos
El kernel mantiene un árbol de recursos para evitar conflictos de direcciones entre dispositivos. Las siguientes funcinoes gestionan la inserción y validación de nuevos intervalos de memoria o E/S.
struct hw_resource * __request_region(struct hw_resource *parent,
resource_size_t start, resource_size_t n,
const char *name, int flags)
{
struct hw_resource *res = kzalloc(sizeof(*res), GFP_KERNEL);
if (!res) return NULL;
res->owner_name = name;
res->base_addr = start;
res->limit_addr = start + n - 1;
res->attributes = IORESOURCE_BUSY | flags;
write_lock(&resource_lock);
while (1) {
struct hw_resource *conflict = __request_resource(parent, res);
if (!conflict) break;
if (conflict != parent) {
parent = conflict;
if (!(conflict->attributes & IORESOURCE_BUSY))
continue;
}
kfree(res);
res = NULL;
break;
}
write_unlock(&resource_lock);
return res;
}
static struct hw_resource * __request_resource(struct hw_resource *root, struct hw_resource *new_res)
{
resource_size_t start = new_res->base_addr;
resource_size_t end = new_res->limit_addr;
struct hw_resource *tmp, **p;
if (end < start || start < root->base_addr || end > root->limit_addr)
return root;
p = &root->first_child;
while (1) {
tmp = *p;
if (!tmp || tmp->base_addr > end) {
new_res->next_sibling = tmp;
*p = new_res;
new_res->parent_node = root;
return NULL;
}
p = &tmp->next_sibling;
if (tmp->limit_addr < start)
continue;
return tmp;
}
}