read.px C stack usage issue

read.px C stack usage issue

Problem Description:

I am following the tutorial here to practice rayshader. However, when I used the code below I get this error:

Error: C stack usage  17812428 is too close to the limit

See in the tutorial, when you come to the step which is the code posted in the question. I get this error. Sample data is provided in the tutorial.

Code:

tbl_census_2018 <- read.px("data/census_2018.px") %>%              # Load & format
  as_tibble()

Package pxR‘s GitHub page has more info about the read.px function which I am pasting below if it helps.

How can I fix this?

#################################################################
# 
# File:         read.px.R
# Purpose:      reads a PC-Axis file into R
#
# Created:      20110618
# Authors:      fvf, cjgb, opl
#
# Modifications: 
#       20111210, cjgb: in the data string, "-" may represent the value 0
#       20111210, cjgb: fixing the strsplit when the split character is contained in the data part
#       20120329, cjgb: number strings in the DATA part can contain ";" as separators.
#               Although deprecated, cases still lurk.
#               20130228, cjgb: There can be ; inside quoted strings that create chaos
#               20130608   fvf: Ability to read files with keys in data area.
#                               ":"  added to defaut na.string (EuroStat files)
#               20130624:  use str_split (line 91) to read DATA area
#               20130917, cjgb: changes to prevent errors with EOL characteres
#               20131115, cjgb: some files do not have heading (or stub): only one of 
#                               them is really required
#               20131118, cjgb: fixed a bug happening when missing (i.e. "..") was the last value in DATA
#                               fixing it required that the last quote was not eliminated (same for first quote)
#               20141222, fvf:  fixing some bug in relation to read files with KEYS (sparse array)
#               20150211, fvf:  The parameter "encoding" is NULL by default. "encoding" is determined by 
#                               the file itself: if CHARSET="ANSI" then "latin1" else "CP437".
#               20150212. fvf:  I have to delete => 20130917, cjgb:  tmp[2] <- gsub(";.*", "", tmp[2])    
#                               many px-files have a semicolon at the end of line in DATA area:
#                               i.e: read.px('http://www.ine.es/pcaxisdl//t20/e245/p05/a2002/l0/00004001.px')
#               20150216. fvf   minor correction of a bug in the modification: 20150211,fvf
#               20150219. fvf   Solving a bug: a missing "DROP=FALSE" was producing a read error on files with a single key
#################################################################

read.px <- function(filename, encoding = NULL, 
                    na.strings = c('"."', '".."', '"..."', '"...."', '"....."', '"......"', '":"')) {

    ## auxiliary functions ##

    clean.spaces <- function(x){
        gsub("^[[:space:]]+|[[:space:]]+$", "", x) # discards heading|trailing whitespace
    }

    get.attributes <- function(x){
        x <- gsub( "([A-Z-]*)\((.*)\).*", "\1;\2", x ) ## separates label-attribute with ";"
        x <- ldply(strsplit(x, ";"), 
                   function(y) c(y, "value")[1:2])
    }

    break.clean <- function(x) {
        x <- clean.spaces( strsplit(x, split = '\"')[[1]] )    ## breaks by '"'
        x[! x %in% c("," , "")]                                 ## and drops spurious seps
    }


    ## end: auxiliary functions ##
    
    # modification by  fvf (150211): Determining the character encoding used in the file => encoding
    
    if (is.null(encoding)) {
        charset  <- readLines(filename, 5)   # read the first five lines
        encoding <- ifelse(any(grepl('CHARSET.*ANSI', charset, ignore.case = T)), 
                           "latin1", "CP437")  # comprobado en debian y osx
    }

    a <- scan(filename, what = "character", sep = "n", quiet = TRUE, fileEncoding = encoding)

    # modification by  fvf: 130608 
    a <- paste(a, collapse = "n")        # Se mantienen "CR/LF luego se quitaran selectivamente

    tmp <- strsplit( a, "DATA=" )[[1]]
    tmp[1] <- gsub("n", " ", tmp[1])     # fvf[130608]: elimina CR de la cabecera
    tmp[2] <- gsub(";", "", tmp[2])       # fvf[150212] (la modificacion rev 92 a 94) da multiples problemas en INEBase                                          
                                          # i.e: read.px('http://www.ine.es/pcaxisdl//t20/e245/p05/a2002/l0/00004001.px')
                                          # en muchos ficheros cada linea del area DATA tiene ";" antes del "EOL"
                                          # lo que produce que solo se lea la primera de las lineas de datos
    a <- paste(tmp[1], "DATA=", tmp[2], sep = "")

    ## modification by cjgb, 20130228 concerning line separators within quoted strings
    ## ; is the logical line end in px files
    ## so we should do:
    ## a <- unlist(strsplit(a, ";"))    
    ## but there might be ; inside quoted strings
    ## so we need the following workaround:

    punto.coma <- str_locate_all(a, ";")[[1]][,1]   # where the ";" are
    comillas   <- str_locate_all(a, '"')[[1]][,1]   # where the '"' are

    ## ";" not after an odd number of '"'
    ## these are the proper "cuts"
    cortes     <- Filter( function(x) sum(comillas < x) %% 2 == 0, punto.coma )     
    
    a <- str_sub(a, c(1, cortes + 1), c(cortes - 1, str_length(a)))
    a <- a[!is.na(a)]
    a <- a[a != ""]
    
    ## end of modification by cjgb, 20130228 concerning line separators within quoted strings
    
   
    # change strsplit by str-split. In big px-files:
    #  "Error: C stack usage is too close to the limit"
    a <- do.call(rbind, str_split(a, "=", n = 2))   

    ## fvf.20141222: not chage to factor: ++ stringsAsFactors=F)
    a <- data.frame(cbind(get.attributes(a[, 1]), a[, 2], stringsAsFactors=F))

    colnames(a) <- c("label", "attribute", "value")
  
    ## build a px object: list with px class attribute ##  
    
    a$label     <- make.names(clean.spaces(a$label))
    a$attribute <- make.names(clean.spaces(gsub('\"', "", a$attribute)))
    
    # need to avoid that quotes are removed in DATA part because of a bug:
    # a case was reported where the data part ended in ".." and the last quote was erased
    # and this affected the scan function below
    a.data                     <- as.character(a[a$label == "DATA", "value"])
    a.value                    <- gsub('^\"|\"$', "", a$value)   # removes " at beginning / end
    a.value[a$label == "DATA"] <- a.data
    names(a.value)             <- a$attribute
    
    px <- tapply(a.value, a$label, as.list)    

    ## these metadata keys contain vectors (comma separated)
    ## we need to split them (and clean the mess: extra spaces, etc.)
    px$STUB$value    <- if(!is.null(px$STUB))    make.names(break.clean(px$STUB$value))
    px$HEADING$value <- if(!is.null(px$HEADING)) make.names(break.clean(px$HEADING$value))

    px$VALUES <- lapply(px$VALUES, break.clean)

    # fvf.20141222: if there are not CODES, do not create CODES
    if (!is.null(px$CODES))
       px$CODES <- lapply(px$CODES, break.clean)

    # fvf.20141222: Sustituye ["~~~~" "~~~~~"] por ["~~~~~"n"~~~~"]  en 
    # campos multilinea con retornos perdidos (simplifica la lectura humana)
 
    px <- lapply(px, function(e){
      if (!is.null(e$value)) 
        e$value <- gsub('"[[:space:]]+"', '"n"', e$value)          
      e
    })
    
    #### read the data part into a 'melted' dataframe ###
    
    ## there are two cases: files with/without KEYS keyword
    ## which need to be processed independently

    # fvf[130608]: add to to read files with keys in data area 
    
    if ("KEYS" %in% a$label ){
      
      ## read the whole block
      tc <- textConnection(px$DATA$value); on.exit( close(tc) )
      raw <- read.table(tc, sep = ",", colClasses = "factor")
      
      ## extract and process the data part (the numbers)
      data.part <- as.character(raw[, ncol(raw)] )          # numbers (last column of the data.frame)
      data.part <- gsub('"-"', 0, data.part)                # 0's might be encoded as "-"
      data.part <- scan(text = data.part, na.strings = na.strings, quiet = T)
      
      ## extract and process the keys part (it needs to be staked a number of times, 
      ##  as many as there are entries in the data vector in each row in the block)
      keys.part <- raw[, -ncol(raw), drop = FALSE]    
      keys.part <- keys.part[ rep(1:nrow(keys.part), each = length(data.part) / nrow(keys.part) ), , drop = FALSE ]
      colnames(keys.part) <- names(px$KEYS)
      
      ## change CODES (if any) in keys part to VALUES (consistency issue)
      # for (col.name in colnames(keys.part)[unlist(px$KEYS) == "CODES"])
      #  keys.part[[col.name]] <- mapvalues(keys.part[[col.name]], 
      #                                     from = px$CODES[[col.name]], 
      #                                     to   = px$VALUES[[col.name]])
      # fvf.20141222:
      for (col.name in colnames(keys.part)){
        if (px$KEYS[[col.name]] == 'CODES')   {            
          keys.part[[col.name]]  <- factor(keys.part[[col.name]], levels = px$CODES[[col.name]]) 
          levels(keys.part[[col.name]]) <- px$VALUES[[col.name]]  ## all levels a VALUES
        } else  keys.part[[col.name]]  <- factor(keys.part[[col.name]], levels = px$VALUES[[col.name]] )           
      }
      
      
      ## extract and process the variables that are not keys
      no.keys.part <- px$VALUES[setdiff(names(px$VALUES), names(px$KEYS))]                       
      no.keys.part <- expand.grid(rev(no.keys.part))
      
      ## put everything together & cleanup
      px$DATA$value <- data.frame( keys.part, 
                                   no.keys.part, 
                                   value = data.part,
                                   row.names = NULL)
    }  
    else
    {
      tmp <- gsub('"-"', 0, px$DATA$value)        # 0 can be encoded as "-"
      tmp <- gsub("n", " ", tmp)                 # delete CR/LF of DATA area fvf[130608]

      tc  <- textConnection(tmp); on.exit( close(tc) )
      raw <- scan(tc, na.strings = na.strings, quote = NULL, quiet = TRUE)
      
      names.vals <- c( rev(px$HEADING$value), rev( px$STUB$value ) )
      output.grid <- data.frame(do.call(expand.grid, px$VALUES[names.vals]))
      
      # sanity check: avoids the problem of "reclycling" of values if
      # the ratio of lenghts of variables and values is an exact integer      
      if (nrow(output.grid) != length(raw))
        stop( "The input file is malformed: data and varnames length differ" )
      
      px$DATA$value           <- data.frame(output.grid, raw)
      colnames(px$DATA$value) <- c(names.vals, "value")    

    }
    
    class(px) <- "px"
    px
}

Solution – 1

The issue should have been fixed with the new code release (version 0.42.6).

Rate this post
We use cookies in order to give you the best possible experience on our website. By continuing to use this site, you agree to our use of cookies.
Accept
Reject