local log = logging.new ("tablerows") return function(dom) local has_child_elements = function(child) -- detect if the element contains child elements local child_elements = 0 local children = child:get_children() local last_child_pos for pos, el in ipairs(children) do last_child_pos = pos local step = el:is_element() and 1 or 0 -- log:info("element name", el._name) child_elements = child_elements + step end -- longtable has

inside empty rows, we regard them as empty if child_elements == 1 and children[last_child_pos]:get_element_name() == "p" and child:get_text():gsub("%s", "") == "" then child_elements = 0 end return child_elements > 0 end local is_empty_row = function(row) local not_empty = false local element_count = 0 -- ignore hline rows local row_class = row:get_attribute("class") if row_class == "hline" or row_class == "cline" then return false end -- detect if the row contain only one empty child for _,child in ipairs(row:get_children() or {}) do if child:is_element() then element_count = element_count + 1 -- empty rows contain only one element, it is not empty otherwise if element_count > 1 or has_child_elements(child) then return false end -- detect if it contains only whitespace not_empty = child:get_text():gsub("%s","") ~= "" or not_empty end end -- print("element count", element_count, not_empty) return element_count == 1 and not_empty == false end local is_not_styled = function(row, css) -- get the id attribute and escape it, so it can be used in regexp local id = row:get_attribute("id") if not id then return true end -- no styling without id local search_term = "%#" .. id:gsub("%-", "%%-") -- if the CSS file contains the row id ( elements can also have id -- that matches this pattern, so we should keep the row if we match them too) return not css:match(search_term) end local hline_hr = function(row) -- remove
elements from "hline" rows for _, hr in ipairs(row:query_selector(".hline hr")) do hr:remove_node() end end local longrable_last_row = function(tbl) -- longtable contains last row of empty cells local rows= tbl:query_selector("tr") local last_row = rows[#rows] if not last_row then return end for _, cell in ipairs(last_row:query_selector("td")) do -- loop over cells in the last row a and detect that they are empty. break processing if they are not. if has_child_elements(cell) or not cell:get_text():match("^%s*$") then return end end last_row:remove_node() end local load_css_files = function() -- the empty rows can be styled using CSS, for example configuration for -- Booktabs does that. We shouldn't remove such rows. local cssfiles = {} for _, link in ipairs(dom:query_selector("head link")) do local src = link:get_attribute("href") if src then local f = io.open(src, "r") if f then local contents = f:read("*all") f:close() table.insert(cssfiles, contents) end end end return table.concat(cssfiles, "\n") end local css = load_css_files() for _, tbl in ipairs(dom:query_selector("table")) do -- find the empty rows for _, row in ipairs(tbl:query_selector("tr")) do if is_empty_row(row) and is_not_styled(row, css) then row:remove_node() end hline_hr(row) end if tbl:get_attribute("class") and tbl:get_attribute("class"):match("longtable") then longrable_last_row(tbl) end end return dom end