Changeset - a97b3182afc5
[Not reviewed]
default
0 2 0
Nathan Brink (binki) - 15 years ago 2010-03-25 01:45:22
ohnobinki@ohnopublishing.net
better handle execio failure
2 files changed with 29 insertions and 11 deletions:
0 comments (0 inline, 0 general)
src/common/execio.c
Show inline comments
 
/*
 
  Copyright 2008 Nathan Phillip Brink, Ethan Zonca
 

	
 
  This file is a part of DistRen.
 

	
 
  DistRen is free software: you can redistribute it and/or modify
 
  it under the terms of the GNU Affero General Public License as published by
 
  the Free Software Foundation, either version 3 of the License, or
 
  (at your option) any later version.
 

	
 
  DistRen is distributed in the hope that it will be useful,
 
  but WITHOUT ANY WARRANTY; without even the implied warranty of
 
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
  GNU Affero General Public License for more details.
 

	
 
  You should have received a copy of the GNU Affero General Public License
 
  along with DistRen.  If not, see <http://www.gnu.org/licenses/>.
 
*/
 

	
 
#include "execio.h"
 

	
 
#include <unistd.h>
 
#include <sys/types.h>
 
#ifndef _WIN32
 
#include <sys/wait.h>
 
#endif
 
#include <signal.h>
 
#include <malloc.h>
 
#include <fcntl.h>
 
#include <stdio.h>
 
#include <errno.h>
 

	
 
int execio_open(struct execio **rem, const char *progname, char *const argv[])
 
{
 
  /* pipe used to write to child */
 
  int pipe_write[2];
 
  /* pipe used to read from child */
 
  int pipe_read[2];
 

	
 
  pid_t child;
 

	
 
  /* for wait(2) if needed */
 
  int childstatus;
 
  
 
  int counter;
 
  int counter2;
 
  int maxfds;
 

	
 
  /* create two pipes to facilitate communication with child */
 
  if(pipe(pipe_write))
 
    return 1;
 
  if(pipe(pipe_read))
 
    {
 
      close(pipe_write[0]);
 
      close(pipe_write[1]);
 
      return 1;
 
    }
 
  
 
  /* parent */
 
  child = fork();
 
  if(child == -1)
 
    {
 
      close(pipe_write[0]);
 
      close(pipe_write[1]);
 
      close(pipe_read[0]);
 
      close(pipe_read[1]);
 
      return 1;
 
    }
 
  if(child)
 
    /* the parent proc: */
 
    {
 
      /* close sides of pipe we won't use */
 
      close(pipe_write[0]);
 
      close(pipe_read[1]);
 
      
 
      /* setup execio struct */
 
      (*rem) = malloc(sizeof(struct execio));
 
      fprintf(stderr, "Allocated rem at %d\n", rem);
 
      if(!(*rem))
 
	{
 
	  /* we should tell the child we're dead - use wait and close our end of the pipes! */
 
	  close(pipe_write[1]);
 
	  close(pipe_read[0]);
 
	  /* we should probably pass of the wait() call to a thread that just does boring things like that. Especially for when the server tries to connect to other servers... */
 
	  /* maybe we should just kill instead of term the child */
 
	  kill(child, SIGTERM);
 
	  /* the waitpid(2) seems to indicate that only when the child is terminated will this wait return. */
 
	  waitpid(child, &childstatus, 0); 
 
	}
 
      (*rem)->pipe_write = pipe_write[1];
 
      (*rem)->pipe_read = pipe_read[0];
 
      (*rem)->state = 0;
 
      (*rem)->child = child;
 
      
 
      return 0;
 
    }
 
  
 
  /* child */
 
  else
 
    {
 
      /* close unused pipes */
 
      close(pipe_write[1]);
 
      close(pipe_read[0]);
 

	
 
      /*
 
	reset stdin, stdout, and stderr to the appropriate files. OK, not stderr :-) 
 
      */
 
      dup2(pipe_read[1], STDOUT_FILENO);
 
      dup2(pipe_write[0], STDIN_FILENO);
 
      /*
 
	close the fds that were dup'd
 
       */
 
      close(pipe_read[1]);
 
      close(pipe_write[0]);
 

	
 
      /* 
 
	 close all other file descriptors. We want to keep 0, 1, and 2 open. We don't know that the last open() or pipe() always gives the highest fd number. However, I will assume that it does. Maybe this is a bad idea:
 
       */
 
      counter = pipe_write[0];
 
      if(counter < pipe_write[1])
 
	counter = pipe_write[1];
 
      if(counter < pipe_read[0])
 
	counter = pipe_read[0];
 
      if(counter < pipe_read[1])
 
	counter = pipe_read[1];
 
      counter2 = 0;
 
      maxfds = counter;
 
      while(counter > 2)
 
	{
 
	  if(!close(counter))
 
	    counter2 ++; /* record how many descriptors we still had open :-) */
 
	  counter --;
 
	}
 
      
 
      /* stderr is the only stream we haven't confiscated atm - just for fun - I will confiscate it later, though, to support parsing error messages */
 
      fprintf(stderr, "closed %d/%d fds before execing \"%s\"\n", counter2, maxfds, progname);
 

	
 
      /*
 
	now exec: execvp uses interpreter to find the file to exec
 
       */
 
      execvp(progname, argv);
 

	
 
      return 1; /* this line should never be reached because we exec -- unless if the exec returns something bad. Then we'd have to tell execio over the pipe about that somehow... */
 
      /* in fact, maybe we should abort() here because if we returned, a monster of a distren client would exist! */
 
      fprintf(stderr, "uh-oh, ``%s'' didn't start for execio\n", progname);
 
      exit(1);
 
    }
 
}
 

	
 
/*
 
  returns 1 if child has exited, 
 
  returns 0 if child is still alive
 
 */
 
int _execio_checkpid(struct execio *eio)
 
{
 
  int childstatus;
 
  
 
#ifdef _WIN32
 
  waitpid(eio->child, &childstatus, 0);
 
#else
 
  waitpid(eio->child, &childstatus, WNOHANG);
 
#endif
 
  /* perror()? */
 

	
 
  return WIFEXITED(childstatus);
 
}
 

	
 

	
 
int execio_read(struct execio *eio, void *buf, size_t len, size_t *bytesread)
 
{
 
  ssize_t ret;
 
  /*
 
    TODO: detect NULL eio? 
 
    TODO: errno?
 
    update status of eio for execio_status/to be able to cleanup subproc??
 

	
 
    whenever read() returns 0, it means EOF
 
   */
 
  (*bytesread) = read(eio->pipe_read, buf, len);
 
  if(!*bytesread)
 
  
 
  ret = read(eio->pipe_read, buf, len);
 
  if(ret == -1)
 
    {
 
      (*bytesread) = 0;
 
      perror("read");
 
      switch(errno)
 
	{
 
	case EAGAIN:
 
	case EINTR:
 
	  return 0;
 
	  break;
 
	default:
 
	  return 1;
 
	}
 
    }
 

	
 
  (*bytesread) = (size_t)ret;
 
  if(!ret)
 
    {
 
      /* should also be able to figure out if is bad fd and should set EXECIO_STATE_ERROR instead of _EOF */
 
      eio->state = EXECIO_STATE_EOF;
 
      return 1;
 
    }
 

	
 
  return 0;
 
}
 

	
 
int execio_write(struct execio *eio, void *buf, size_t len, size_t *bytesread)
 
{
 
  errno = 0;
 
  (*bytesread) = write(eio->pipe_write, buf, len);
 
  if(!*bytesread)
 
    {
 
      switch(errno)
 
	{
 
	case EPIPE:
 
	  /* 
 
	     the program closed the pipe (died)
 
	  */
 
	fprintf(stderr, "execio_write: the child program closed its stdin pipe\n");
 
	eio->state = EXECIO_STATE_EOF;
 
	break;
 
	
 
	default:
 
	  fprintf(stderr, "execio_write: unhandled error writing to an fd: \n");
 
	  perror("write");
 
	  eio->state = EXECIO_STATE_ERROR;
 
	  break;
 
	  
 
	}
 
      
 
      return 1;
 
    }
 

	
 
  return 0;
 
}
 

	
 

	
 
enum execio_state execio_state(struct execio *eio)
 
{
 
  return eio->state;
 
}
 

	
 

	
 
int execio_close(struct execio *eio)
 
{
 
  int childstatus;
 

	
 
  close(eio->pipe_read);
 
  close(eio->pipe_write);
 

	
 
  /* maybe we should just kill rather than term the child */
 
  kill(eio->child, SIGTERM);
 
  /* 
 
     the waitpid(2) seems to indicate that only when the child is terminated will this wait return. 
 
     This are of code will probably need improving - the ability to seng SIGKILL after a timeout? So we'll output a debug line before running waitpid
 
  */
 
  fprintf(stderr, "execio_close: running waitpid\n");
 
  waitpid(eio->child, &childstatus, 0);
 

	
 
  free(eio);
 
  
 
  return 0;
 
}
 

	
src/server/slavefuncs.c
Show inline comments
 
@@ -223,198 +223,200 @@ int curlpost(char *filename, char *url, 
 
  curl_formadd(&formpost,
 
               &lastptr,
 
               CURLFORM_COPYNAME, "submit",
 
               CURLFORM_COPYCONTENTS, "send",
 
               CURLFORM_END);
 

	
 
  curl = curl_easy_init();
 
  headerlist = curl_slist_append(headerlist, buf);
 
  if(curl) {
 
    /* Setting the URL to get the post, and the contents of the post */
 
    curl_easy_setopt(curl, CURLOPT_URL, url);
 
    curl_easy_setopt(curl, CURLOPT_HTTPPOST, formpost);
 
    res = curl_easy_perform(curl);
 

	
 
    curl_easy_cleanup(curl);
 
    /* cleanup the formpost junk */
 
    curl_formfree(formpost);
 
    curl_slist_free_all (headerlist);
 
    free(sjobnum);
 
    free(sframenum);
 
    free(sslavekey);
 
  }
 
  return res;
 
}
 

	
 

	
 
/** Logs the user into the server after ensuring that keys exist */
 
int login_user(char *username)
 
{
 
  // @TODO: Put some telnet-style auth code here unless this is obselete
 
  return 1; // success
 
}
 

	
 
/** Replaces wordtoreplace with replacewith in conffile (relative to SYSCONFDIR) */
 
int conf_replace(char *conffile, char *wordtoreplace, char *replacewith){
 
  int maxlinelen = 120;
 
  char *fileOrig;
 
  char *fileRepl;
 
  _distren_asprintf(&fileOrig, "%s/%s", SYSCONFDIR, conffile);
 
  _distren_asprintf(&fileRepl, "%s%s.edited", SYSCONFDIR, conffile);
 
  char buffer[maxlinelen+2];
 
  char *buff_ptr, *find_ptr;
 
  FILE *fp1, *fp2;
 
  size_t find_len = strlen(wordtoreplace);
 
  fp1 = fopen(fileOrig,"r");
 
  fp2 = fopen(fileRepl,"w");
 
  if (fp1 ==NULL){
 
    fprintf(stderr, "%s doesn't exist\n",fileOrig);
 
    return 0;
 
  }
 
  else if(fp2 ==NULL){
 
    fprintf(stderr, "Can't write a file to disk! Check permissions.\n");
 
    return 0;
 
  }
 
  else{
 
    while(fgets(buffer,maxlinelen+2,fp1))
 
      {
 
        buff_ptr = buffer;
 
        while ((find_ptr = strstr(buff_ptr,wordtoreplace)))
 
        {
 
           while(buff_ptr < find_ptr)
 
             fputc((int)*buff_ptr++,fp2);
 
           fputs(replacewith,fp2);
 
           buff_ptr += find_len;
 
         }
 
         fputs(buff_ptr,fp2);
 
      }
 
    rename(fileRepl, fileOrig);
 
  }
 
  fclose(fp2);
 
  fclose(fp1);
 
  fprintf(stderr,"Wrote conf file...\n");
 
return 1; // Success
 
}
 

	
 

	
 
/* Executors */
 

	
 
/** Executor function for Blender operations */
 
int exec_blender(char *input, char *output, int frame)
 
{
 
  int ret;
 
  char *frame_str;
 
  _distren_asprintf(&frame_str, "%i", frame);
 

	
 
  char *command = "blender"; // @TODO: We currently expect this to be in PATH
 
  char *cmd[] = { command, "-b", input, "-o", output, "-f", frame_str, "-t", "0", (char *)NULL }; // arguments for blender
 

	
 
  fprintf(stderr,"Preparing to execute command: %s -b %s -o %s -f %s\n", command, input, output, frame_str);
 

	
 
  char buf[20];
 
  struct execio *testrem;
 
  size_t readlen;
 

	
 
  fprintf(stderr,"Executing: %s\n", frame_str);
 
  ret = execio_open(&testrem, command, cmd);
 
  buf[19] = '\0';
 
  while(!execio_read(testrem, buf, 20, &readlen))
 
  if(ret)
 
    {
 
      if(readlen > 20) {
 
        fprintf(stderr, "Something is terribly wrong!\n");
 
      fprintf(stderr, "error running blender\n");
 
      return 1;
 
       }
 
  buf[19] = '\0';
 
  while(!execio_read(testrem, buf, 19, &readlen))
 
    {
 
       buf[readlen] = '\0';
 
       fprintf(stderr, "read \"%s\"\n", buf);
 
    }
 
  execio_close(testrem);
 
  free(frame_str);
 
  return ret;
 
}
 

	
 
void xmlinit()
 
{
 
  xmlInitParser();
 
  xmlXPathInit();
 
}
 

	
 
void xmlcleanup()
 
{
 
  xmlCleanupParser();
 
}
 

	
 

	
 
/** Creates directories recursively */
 
int distren_mkdir_recurse(char *dirname)
 
{
 
  size_t counter;
 
  char *nextdir;
 

	
 
  nextdir = strdup(dirname);
 
  for(counter = 0; nextdir[counter]; counter ++)
 
    {
 
      /** @TODO OS-portabalize the path-separators */
 
      if(nextdir[counter] == '/')
 
        {
 
          nextdir[counter] = '\0';
 
          mkdir(nextdir, S_IRWXU | S_IRGRP | S_IROTH);
 
          nextdir[counter] = '/';
 
        }
 
    }
 

	
 
  return 0;
 
}
 

	
 
/**
 
 @TODO: Use for constructing path to job data locally and/or on data.distren.org
 
 */
 
int job_build_path(char *filename, unsigned int jobnum)
 
{
 
  return 0;
 
}
 

	
 

	
 
int downloadTar(char *url, char *destinationPath){
 
  // Prepare to download the job tar if it isn't  present
 
  struct stat buffer;
 
  int fstatus = stat(destinationPath, &buffer);
 
  if(fstatus == -1)
 
    {
 
      // Download the Tar
 
      int ret = curlget(url, destinationPath);
 
      if(ret == 0){
 
        fprintf(stderr, "Job data retrieved successfully\n");
 
        // free(url); @FIXME: causes doublefree! Curl must free the url?
 
        return 0;
 
      }
 
      else if(ret == 1){
 
        fprintf(stderr, "Downloading job data from %s failed. Check your network connection.\n",url);
 
        free(url);
 
        return 1; // Eventually make a retry loop
 
      }
 
    }
 
  fprintf(stderr, "Tar already exists! Download cancelled.\n");
 
  return 2;
 
}
 

	
 

	
 
int uploadOutput(char *pathtoOutput, char *urltoOutput, int jobnum, int framenum, int slavekey){
 
  //fprintf(stderr,"Uploading output %s to url %s for job %d and frame %d for slavekey %d", pathtoOutput, urltoOutput, jobnum, framenum, slavekey);
 
  if( !curlpost(pathtoOutput, urltoOutput, jobnum, framenum, slavekey)) // Uploads output
 
    {
 
      fprintf(stderr,"Upload successful, removing old output...\n");
 
      remove(pathtoOutput); // Delete the file after its uploaded
 
      return 0;
 
    }
 
  else
 
    {
 
      fprintf(stderr,"Upload failed. Check your network connection. Retrying upload...\n");
 
      int tries=1;
 
      while(tries<=10 && curlpost(pathtoOutput, urltoOutput, jobnum, framenum, slavekey))
 
        {
 
          fprintf(stderr, "Upload failed. Trying again in 10 seconds... (attempt %d of 10)\n", tries);
 
          tries++;
 
          sleep(10);
 
        }
 
      return 1; // Upload failed after multiple tries
 
      // @FUTURE: Keep track of files that we were unable to upload, and upload them later
 
    }
 
}
 
@@ -725,124 +727,121 @@ int _web_getwork(int slavekey, char *sla
 
  struct _web_memorystruct data = _web_getrequest(url);
 
  free(url);
 

	
 
  if(!data.memory || !strcmp(data.memory,",")){
 
    fprintf(stderr,"No work available on server!\n");
 
    return 0;
 
  }
 
  else if(!strcmp(data.memory, "ERROR_BADKEY")){
 
    fprintf(stderr,"*** Slave %d does not exist!\n",slavekey);
 
    free(data.memory);
 
    return 0;
 
  }
 
  // Compare to PACKAGE_VERSION
 
  else{
 
    char *tmp;
 
    char *serverversion;
 

	
 
    tmp = strtok (data.memory,",,");
 
    if(tmp != NULL){ // make sure work is available
 
      *jobnum = atoi(tmp);
 
      tmp = strtok (NULL, ",");
 
      if(tmp == NULL)
 
        return 0; // no work
 
      *framenum = atoi(tmp);
 
      tmp = strtok (NULL, ",");
 
      if(tmp == NULL)
 
        return 0; // no work
 
      serverversion = tmp;
 
      if(strcmp(PACKAGE_VERSION,serverversion)){
 
        fprintf(stderr,"Your distren package is out of date! Please acquire a newer version. (%s local vs %s remote)\n", PACKAGE_VERSION, serverversion);
 
        return 0;
 
      }
 
      free(data.memory);
 
      return 1;
 
    }
 
    else
 
      return 0; // error
 
  }
 
}
 

	
 
void _web_setrenderpower(int slavekey, char *slavepass, int renderpower){
 
  fprintf(stderr,"Setting render power on server... ");
 
  char *url;
 
  _distren_asprintf(&url,"http://dev.distren.org/slave/act.php?mode=setrenderpower&slavekey=%d&slavepass=%s&renderpower=%d", slavekey, slavepass, renderpower);
 
  struct _web_memorystruct data = _web_getrequest(url);
 
  free(url);
 

	
 
  fprintf(stderr,"%s\n", data.memory);
 
  if(data.memory)
 
    free(data.memory);
 
}
 

	
 
int slaveBenchmark(char *datadir, int *benchmarkTime, int *renderPower){
 
  int ret;
 
  int frameToRender = 1;
 
  char *frame_str;
 
  _distren_asprintf(&frame_str, "%d", frameToRender); // Render frame 1
 

	
 
  char *output;
 
  _distren_asprintf(&output, "%s/benchmark#.jpg", datadir); // Where to save benchmark output
 

	
 
  char *realOutput;
 
  _distren_asprintf(&realOutput, "%s/benchmark%d.jpg", datadir, frameToRender); // Where to save benchmark output
 

	
 

	
 
  char *input;
 
  _distren_asprintf(&input, "%s/benchmark.blend", datadir); // Input file
 

	
 
  fprintf(stderr,"Downloading benchmark data...\n");
 
  curlget("http://data.distren.org/benchmark.blend", input); // Download to input file location
 

	
 
  char *command = "blender"; // @TODO: We currently expect this to be in PATH
 
  char *cmd[] = { command, "-b", input, "-o", output, "-f", frame_str, "-t", "0", (char *)NULL }; // arguments for blender
 

	
 
  // fprintf(stderr,"Preparing to execute command: %s -b %s -o %s -f %s\n", command, input, output, frame_str);
 
  fprintf(stderr,"Running benchmark...\n");
 

	
 
  long startTime;
 
  long endTime;
 

	
 
  time(&startTime);
 

	
 
  char buf[20];
 
  struct execio *testrem;
 
  size_t readlen;
 

	
 
  ret = execio_open(&testrem, command, cmd);
 
  if(ret)
 
    {
 
      fprintf(stderr, "Error executing blender\n");
 
      return 1;
 
    }
 

	
 
  buf[19] = '\0';
 
  while(!execio_read(testrem, buf, 19, &readlen))
 
    {
 
      if(readlen > 20) {
 
        fprintf(stderr, "Something is terribly wrong!\n");
 
       }
 
       buf[readlen] = '\0';
 
       fprintf(stderr, "read \"%s\"\n", buf);
 
    }
 
  execio_close(testrem);
 

	
 
  time(&endTime);
 

	
 
  struct stat buffer;
 
  int ostatus = stat(realOutput, &buffer);
 
  if(ostatus == -1){
 
    ret = 1; // Return error if output wasn't generated
 
  }
 
  else
 
    remove(output);
 

	
 
  *benchmarkTime = abs(difftime(startTime,endTime));
 
  float tmp = *benchmarkTime;
 
  tmp = (1/tmp) * 50000;
 
  *renderPower = (int)tmp;
 
  free(realOutput);
 
  free(frame_str);
 
  free(input);
 
  free(output);
 
  return ret;
 
}
0 comments (0 inline, 0 general)